diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 9f389d8b4..c54df2e90 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -34,7 +34,9 @@ jobs:
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
         if: github.event_name == 'pull_request'
-      - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
+      - run: |
+          set -eo pipefail # Needed to actually fail the job if ci-util fails
+          python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT"
         id: script
 
   test:
@@ -50,7 +52,6 @@ jobs:
           os: ubuntu-24.04-arm
         - target: aarch64-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
           build_only: 1
         - target: arm-unknown-linux-gnueabi
           os: ubuntu-24.04
@@ -70,8 +71,12 @@ jobs:
           os: ubuntu-24.04
         - target: powerpc64le-unknown-linux-gnu
           os: ubuntu-24.04
+        - target: powerpc64le-unknown-linux-gnu
+          os: ubuntu-24.04-ppc64le
         - target: riscv64gc-unknown-linux-gnu
           os: ubuntu-24.04
+        - target: s390x-unknown-linux-gnu
+          os: ubuntu-24.04-s390x
         - target: thumbv6m-none-eabi
           os: ubuntu-24.04
         - target: thumbv7em-none-eabi
@@ -88,10 +93,8 @@ jobs:
           os: macos-13
         - target: i686-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
         - target: x86_64-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
         - target: i686-pc-windows-gnu
           os: windows-2025
           channel: nightly-i686-gnu
@@ -102,14 +105,24 @@ jobs:
     needs: [calculate_vars]
     env:
       BUILD_ONLY: ${{ matrix.build_only }}
-      TEST_VERBATIM: ${{ matrix.test_verbatim }}
       MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
     steps:
+    - name: Print $HOME
+      shell: bash
+      run: |
+        set -x
+        echo "${HOME:-not found}"
+        pwd
+        printenv
     - name: Print runner information
       run: uname -a
+
+    # Native ppc and s390x runners don't have rustup by default
+    - name: Install rustup
+      if: matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x'
+      run: sudo apt-get update && sudo apt-get install -y rustup
+
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     - name: Install Rust (rustup)
       shell: bash
       run: |
@@ -119,7 +132,12 @@ jobs:
         rustup update "$channel" --no-self-update
         rustup default "$channel"
         rustup target add "${{ matrix.target }}"
+
+    # Our scripts use nextest if possible. This is skipped on the native ppc
+    # and s390x runners since install-action doesn't support them.
     - uses: taiki-e/install-action@nextest
+      if: "!(matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x')"
+
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}
@@ -147,6 +165,10 @@ jobs:
     - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV"
       shell: bash
 
+    - name: Download musl source
+      run: ./ci/update-musl.sh
+      shell: bash
+
     - name: Verify API list
       if: matrix.os == 'ubuntu-24.04'
       run: python3 etc/update-api-list.py --check
@@ -182,8 +204,6 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     # Unlike rustfmt, stable clippy does not work on code with nightly features.
     - name: Install nightly `clippy`
       run: |
@@ -191,8 +211,29 @@ jobs:
         rustup default nightly
         rustup component add clippy
     - uses: Swatinem/rust-cache@v2
+    - name: Download musl source
+      run: ./ci/update-musl.sh
     - run: cargo clippy --workspace --all-targets
 
+  build-custom:
+    name: Build custom target
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add rust-src
+    - uses: Swatinem/rust-cache@v2
+    - run: |
+        # Ensure we can build with custom target.json files (these can interact
+        # poorly with build scripts)
+        cargo build -p compiler_builtins -p libm \
+          --target etc/thumbv7em-none-eabi-renamed.json \
+          -Zbuild-std=core
+
   benchmarks:
     name: Benchmarks
     timeout-minutes: 20
@@ -205,8 +246,6 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@master
-      with:
-        submodules: true
     - uses: taiki-e/install-action@cargo-binstall
 
     - name: Set up dependencies
@@ -223,6 +262,8 @@ jobs:
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}
+    - name: Download musl source
+      run: ./ci/update-musl.sh
 
     - name: Run icount benchmarks
       env:
@@ -235,7 +276,7 @@ jobs:
       with:
         name: ${{ env.BASELINE_NAME }}
         path: ${{ env.BASELINE_NAME }}.tar.xz
-    
+
     - name: Run wall time benchmarks
       run: |
         # Always use the same seed for benchmarks. Ideally we should switch to a
@@ -256,8 +297,6 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     - name: Install Rust (rustup)
       run: rustup update nightly --no-self-update && rustup default nightly
       shell: bash
@@ -292,10 +331,8 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
-    - name: Install stable `rustfmt`
-      run: rustup set profile minimal && rustup default stable && rustup component add rustfmt
+    - name: Install nightly `rustfmt`
+      run: rustup set profile minimal && rustup default nightly && rustup component add rustfmt
     - run: cargo fmt -- --check
 
   extensive:
@@ -317,13 +354,13 @@ jobs:
       TO_TEST: ${{ matrix.to_test }}
     steps:
       - uses: actions/checkout@v4
-        with:
-          submodules: true
       - name: Install Rust
         run: |
           rustup update nightly --no-self-update
           rustup default nightly
       - uses: Swatinem/rust-cache@v2
+      - name: download musl source
+        run: ./ci/update-musl.sh
       - name: Run extensive tests
         run: ./ci/run-extensive.sh
       - name: Print test logs if available
@@ -333,6 +370,7 @@ jobs:
   success:
     needs:
       - benchmarks
+      - build-custom
       - clippy
       - extensive
       - miri
diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml
new file mode 100644
index 000000000..ad7693e17
--- /dev/null
+++ b/.github/workflows/rustc-pull.yml
@@ -0,0 +1,24 @@
+# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand).
+name: rustc-pull
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Run at 04:00 UTC every Monday and Thursday
+    - cron: '0 4 * * 1,4'
+
+jobs:
+  pull:
+    if: github.repository == 'rust-lang/compiler-builtins'
+    uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
+    with:
+      github-app-id: ${{ vars.APP_CLIENT_ID }}
+      # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375
+      zulip-stream-id: 219381
+      zulip-topic: 'compiler-builtins subtree sync automation'
+      zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com"
+      pr-base-branch: master
+      branch-name: rustc-pull
+    secrets:
+      zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
+      github-app-secret: ${{ secrets.APP_PRIVATE_KEY }}
diff --git a/.gitignore b/.gitignore
index 5287a6c72..f12b871c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ iai-home
 *.bk
 *.rs.bk
 .#*
+
+# Manually managed
+crates/musl-math-sys/musl
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 792ed9ab2..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,4 +0,0 @@
-[submodule "crates/musl-math-sys/musl"]
-	path = crates/musl-math-sys/musl
-	url = https://git.musl-libc.org/git/musl
-	shallow = true
diff --git a/.release-plz.toml b/.release-plz.toml
deleted file mode 100644
index 8023ade9b..000000000
--- a/.release-plz.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[workspace]
-# As part of the release process, we delete `libm/Cargo.toml`. Since
-# this is only run in CI, we shouldn't need to worry about it.
-allow_dirty = true
-publish_allow_dirty = true
-
-[[package]]
-name = "compiler_builtins"
-semver_check = false
-changelog_include = ["libm"] # libm is included as part of builtins
-
-[[package]]
-name = "libm"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9f67cfc31..9ae4f893c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -165,3 +165,12 @@ cargo bench --no-default-features \
 
 [`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
 [Valgrind]: https://valgrind.org/
+
+## Subtree synchronization
+
+`compiler-builtins` is included as a [Josh subtree] in the main compiler
+repository (`rust-lang/rust`). You can find a guide on how to create synchronization
+(pull and push) PRs at the [`rustc-dev-guide` page].
+
+[Josh subtree]: https://rustc-dev-guide.rust-lang.org/external-repos.html#josh-subtrees
+[`rustc-dev-guide` page]: https://rustc-dev-guide.rust-lang.org/external-repos.html#synchronizing-a-josh-subtree
diff --git a/Cargo.toml b/Cargo.toml
index bc6b4bd29..956d738f3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,8 @@
 [workspace]
 resolver = "2"
 members = [
+    "builtins-shim",
     "builtins-test",
-    "compiler-builtins",
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
@@ -13,8 +13,8 @@ members = [
 ]
 
 default-members = [
+    "builtins-shim",
     "builtins-test",
-    "compiler-builtins",
     "crates/libm-macros",
     "libm",
     "libm-test",
@@ -25,6 +25,10 @@ exclude = [
     # and `mangled-names` disabled, which is the opposite of what is needed for
     # other tests, so it makes sense to keep it out of the workspace.
     "builtins-test-intrinsics",
+    # We test via the `builtins-shim` crate, so exclude the `compiler-builtins`
+    # that has a dependency on `core`. See `builtins-shim/Cargo.toml` for more
+    # details.
+    "compiler-builtins",
 ]
 
 [profile.release]
diff --git a/builtins-shim/Cargo.toml b/builtins-shim/Cargo.toml
new file mode 100644
index 000000000..707ebdbc7
--- /dev/null
+++ b/builtins-shim/Cargo.toml
@@ -0,0 +1,64 @@
+# NOTE: Must be kept in sync with `../compiler-builtins/Cargo.toml`.
+#
+# The manifest at `../compiler-builtins` is what actually gets used in the
+# rust-lang/rust tree; however, we can't build it out of tree because it
+# depends on `core` by path, and even optional Cargo dependencies need to be
+# available at build time. So, we work around this by having this "shim"
+# manifest that is identical except for the `core` dependency and forwards
+# to the same sources, which acts as the `compiler-builtins` Cargo entrypoint
+# for out of tree testing
+
+[package]
+name = "compiler_builtins"
+version = "0.1.160"
+authors = ["Jorge Aparicio <japaricious@gmail.com>"]
+description = "Compiler intrinsics used by the Rust compiler."
+repository = "https://github.com/rust-lang/compiler-builtins"
+license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
+edition = "2024"
+publish = false
+links = "compiler-rt"
+
+build = "../compiler-builtins/build.rs"
+
+[lib]
+path = "../compiler-builtins/src/lib.rs"
+bench = false
+doctest = false
+test = false
+
+[build-dependencies]
+cc = { optional = true, version = "1.2" }
+
+[features]
+default = ["compiler-builtins"]
+
+# Enable compilation of C code in compiler-rt, filling in some more optimized
+# implementations and also filling in unimplemented intrinsics
+c = ["dep:cc"]
+
+# For implementations where there is both a generic version and a platform-
+# specific version, use the generic version. This is meant to enable testing
+# the generic versions on all platforms.
+no-asm = []
+
+# Workaround for codegen backends which haven't yet implemented `f16` and
+# `f128` support. Disabled any intrinsics which use those types.
+no-f16-f128 = []
+
+# Flag this library as the unstable compiler-builtins lib
+compiler-builtins = []
+
+# Generate memory-related intrinsics like memcpy
+mem = []
+
+# Mangle all names so this can be linked in with other versions or other
+# compiler-rt implementations. Also used for testing
+mangled-names = []
+
+# Only used in the compiler's build system
+rustc-dep-of-std = ["compiler-builtins"]
+
+# This makes certain traits and function specializations public that
+# are not normally public but are required by the `builtins-test`
+unstable-public-internals = []
diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml
index 704de20c5..e73a1f7b1 100644
--- a/builtins-test-intrinsics/Cargo.toml
+++ b/builtins-test-intrinsics/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]}
+compiler_builtins = { path = "../builtins-shim", features = ["compiler-builtins"] }
 panic-handler = { path = "../crates/panic-handler" }
 
 [features]
diff --git a/builtins-test-intrinsics/build.rs b/builtins-test-intrinsics/build.rs
index 89b126ff2..b82581262 100644
--- a/builtins-test-intrinsics/build.rs
+++ b/builtins-test-intrinsics/build.rs
@@ -6,6 +6,5 @@ fn main() {
     println!("cargo::rerun-if-changed=../configure.rs");
 
     let target = builtins_configure::Target::from_env();
-    builtins_configure::configure_f16_f128(&target);
     builtins_configure::configure_aliases(&target);
 }
diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index 66744a081..b9d19ea77 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -40,11 +40,7 @@ mod intrinsics {
         x as f64
     }
 
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     pub fn extendhftf(x: f16) -> f128 {
         x as f128
     }
@@ -201,11 +197,7 @@ mod intrinsics {
 
     /* f128 operations */
 
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     pub fn trunctfhf(x: f128) -> f16 {
         x as f16
     }
@@ -220,50 +212,32 @@ mod intrinsics {
         x as f64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfsi(x: f128) -> i32 {
         x as i32
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfdi(x: f128) -> i64 {
         x as i64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfti(x: f128) -> i128 {
         x as i128
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfsi(x: f128) -> u32 {
         x as u32
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfdi(x: f128) -> u64 {
         x as u64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfti(x: f128) -> u128 {
         x as u128
     }
@@ -540,47 +514,25 @@ fn run() {
     bb(extendhfdf(bb(2.)));
     #[cfg(f16_enabled)]
     bb(extendhfsf(bb(2.)));
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     bb(extendhftf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(extendsftf(bb(2.)));
     bb(fixdfti(bb(2.)));
     bb(fixsfti(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfdi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfsi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfti(bb(2.)));
     bb(fixunsdfti(bb(2.)));
     bb(fixunssfti(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfdi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfsi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfti(bb(2.)));
     #[cfg(f128_enabled)]
     bb(floatditf(bb(2)));
@@ -616,11 +568,7 @@ fn run() {
     bb(truncsfhf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(trunctfdf(bb(2.)));
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     bb(trunctfhf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(trunctfsf(bb(2.)));
diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index 10978c0bb..00a9d8579 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -10,19 +10,19 @@ license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential
 # problems with system RNGs on the variety of platforms this crate is tested on.
 # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
-rand_xoshiro = "0.6"
+rand_xoshiro = "0.7"
 # To compare float builtins against
-rustc_apfloat = "0.2.1"
+rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.14.0", optional = true }
+iai-callgrind = { version = "0.15.2", optional = true }
 
 [dependencies.compiler_builtins]
-path = "../compiler-builtins"
+path = "../builtins-shim"
 default-features = false
 features = ["unstable-public-internals"]
 
 [dev-dependencies]
-criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] }
 paste = "1.0.15"
 
 [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs
index 87a89efb5..da29b5d31 100644
--- a/builtins-test/benches/float_cmp.rs
+++ b/builtins-test/benches/float_cmp.rs
@@ -177,6 +177,7 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
 float_bench! {
     name: cmp_f128_gt,
     sig: (a: f128, b: f128) -> CmpResult,
@@ -189,6 +190,7 @@ float_bench! {
     asm: []
 }
 
+#[cfg(f128_enabled)]
 float_bench! {
     name: cmp_f128_unord,
     sig: (a: f128, b: f128) -> CmpResult,
diff --git a/builtins-test/benches/float_conv.rs b/builtins-test/benches/float_conv.rs
index d4a7346d1..e0f488eb6 100644
--- a/builtins-test/benches/float_conv.rs
+++ b/builtins-test/benches/float_conv.rs
@@ -365,7 +365,6 @@ float_bench! {
 
 /* float -> unsigned int */
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u32,
     sig: (a: f32) -> u32,
@@ -387,7 +386,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u64,
     sig: (a: f32) -> u64,
@@ -409,7 +407,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u128,
     sig: (a: f32) -> u128,
@@ -505,7 +502,6 @@ float_bench! {
 
 /* float -> signed int */
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i32,
     sig: (a: f32) -> i32,
@@ -527,7 +523,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i64,
     sig: (a: f32) -> i64,
@@ -549,7 +544,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i128,
     sig: (a: f32) -> i128,
@@ -666,9 +660,6 @@ pub fn float_conv() {
     conv_f64_i128(&mut criterion);
 
     #[cfg(f128_enabled)]
-    // FIXME: ppc64le has a sporadic overflow panic in the crate functions
-    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
     {
         conv_u32_f128(&mut criterion);
         conv_u64_f128(&mut criterion);
diff --git a/builtins-test/benches/float_extend.rs b/builtins-test/benches/float_extend.rs
index fc44e80c9..939dc60f9 100644
--- a/builtins-test/benches/float_extend.rs
+++ b/builtins-test/benches/float_extend.rs
@@ -110,9 +110,7 @@ float_bench! {
 pub fn float_extend() {
     let mut criterion = Criterion::default().configure_from_args();
 
-    // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
     #[cfg(f16_enabled)]
-    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     {
         extend_f16_f32(&mut criterion);
         extend_f16_f64(&mut criterion);
diff --git a/builtins-test/benches/float_trunc.rs b/builtins-test/benches/float_trunc.rs
index 43310c7cf..9373f945b 100644
--- a/builtins-test/benches/float_trunc.rs
+++ b/builtins-test/benches/float_trunc.rs
@@ -121,9 +121,7 @@ float_bench! {
 pub fn float_trunc() {
     let mut criterion = Criterion::default().configure_from_args();
 
-    // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
     #[cfg(f16_enabled)]
-    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     {
         trunc_f32_f16(&mut criterion);
         trunc_f64_f16(&mut criterion);
@@ -133,11 +131,8 @@ pub fn float_trunc() {
 
     #[cfg(f128_enabled)]
     {
-        // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
         #[cfg(f16_enabled)]
-        #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
         trunc_f128_f16(&mut criterion);
-
         trunc_f128_f32(&mut criterion);
         trunc_f128_f64(&mut criterion);
     }
diff --git a/builtins-test/build.rs b/builtins-test/build.rs
index e8f4eb4dd..5b2dcd12e 100644
--- a/builtins-test/build.rs
+++ b/builtins-test/build.rs
@@ -116,5 +116,4 @@ fn main() {
     }
 
     builtins_configure::configure_aliases(&target);
-    builtins_configure::configure_f16_f128(&target);
 }
diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index 098718567..4bdcf482c 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -17,28 +17,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         "extend_f16_f32",
         "trunc_f32_f16",
         "trunc_f64_f16",
-        // FIXME(#616): re-enable once fix is in nightly
-        // <https://github.com/rust-lang/compiler-builtins/issues/616>
-        "mul_f32",
-        "mul_f64",
     ];
 
-    // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
-    // in their benchmark modules due to runtime panics.
-    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
-
     // FIXME(f16_f128): system symbols have incorrect results
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
     const X86_NO_SSE_SKIPPED: &[&str] = &[
         "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
     ];
 
-    // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
-    // uses `compiler-rt` version.
-    // <https://github.com/llvm/llvm-project/issues/91840>
-    const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
-
     // FIXME(llvm): system symbols have incorrect results on Windows
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
     const WINDOWS_SKIPPED: &[&str] = &[
@@ -57,19 +43,7 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
-    if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
-        && PPC64LE_SKIPPED.contains(&test_name)
-    {
-        return true;
-    }
-
-    if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
-        && X86_NO_SSE_SKIPPED.contains(&test_name)
-    {
-        return true;
-    }
-
-    if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) {
+    if cfg!(x86_no_sse) && X86_NO_SSE_SKIPPED.contains(&test_name) {
         return true;
     }
 
diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs
index 865b9e472..abe7dde64 100644
--- a/builtins-test/tests/addsub.rs
+++ b/builtins-test/tests/addsub.rs
@@ -111,7 +111,7 @@ macro_rules! float_sum {
     }
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_addsub {
     use super::*;
 
@@ -122,7 +122,7 @@ mod float_addsub {
 }
 
 #[cfg(f128_enabled)]
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 mod float_addsub_f128 {
     use super::*;
diff --git a/builtins-test/tests/conv.rs b/builtins-test/tests/conv.rs
index 491915d9b..9b04295d2 100644
--- a/builtins-test/tests/conv.rs
+++ b/builtins-test/tests/conv.rs
@@ -59,32 +59,28 @@ mod i_to_f {
                                 || ((error_minus == error || error_plus == error)
                                     && ((f0.to_bits() & 1) != 0))
                             {
-                                if !cfg!(any(
-                                    target_arch = "powerpc",
-                                    target_arch = "powerpc64"
-                                )) {
-                                    panic!(
-                                        "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
-                                        stringify!($fn),
-                                        x,
-                                        f1.to_bits(),
-                                        y_minus_ulp,
-                                        y,
-                                        y_plus_ulp,
-                                        error_minus,
-                                        error,
-                                        error_plus,
-                                    );
-                                }
+                                panic!(
+                                    "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
+                                    stringify!($fn),
+                                    x,
+                                    f1.to_bits(),
+                                    y_minus_ulp,
+                                    y,
+                                    y_plus_ulp,
+                                    error_minus,
+                                    error,
+                                    error_plus,
+                                );
                             }
                         }
 
-                        // Test against native conversion. We disable testing on all `x86` because of
-                        // rounding bugs with `i686`. `powerpc` also has the same rounding bug.
+                        // Test against native conversion.
+                        // FIXME(x86,ppc): the platform version has rounding bugs on i686 and
+                        // PowerPC64le (for PPC this only shows up in Docker, not the native runner).
+                        // https://github.com/rust-lang/compiler-builtins/pull/384#issuecomment-740413334
                         if !Float::eq_repr(f0, f1) && !cfg!(any(
                             target_arch = "x86",
-                            target_arch = "powerpc",
-                            target_arch = "powerpc64"
+                            all(target_arch = "powerpc64", target_endian = "little")
                         )) {
                             panic!(
                                 "{}({}): std: {:?}, builtins: {:?}",
@@ -118,7 +114,7 @@ mod i_to_f {
         i128, __floattidf;
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
         u32, __floatunsitf;
@@ -129,7 +125,7 @@ mod i_to_f {
         i128, __floattitf;
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
         u32, __floatunsikf;
diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs
index 5ae653cc9..caee4166c 100644
--- a/builtins-test/tests/div_rem.rs
+++ b/builtins-test/tests/div_rem.rs
@@ -138,7 +138,7 @@ macro_rules! float {
     };
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_div {
     use super::*;
 
@@ -147,7 +147,7 @@ mod float_div {
         f64, __divdf3, Double, all();
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     float! {
         f128, __divtf3, Quad,
@@ -156,7 +156,7 @@ mod float_div {
         not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float! {
         f128, __divkf3, Quad, not(feature = "no-sys-f128");
diff --git a/builtins-test/tests/float_pow.rs b/builtins-test/tests/float_pow.rs
index 0e8ae88e8..a17dff27c 100644
--- a/builtins-test/tests/float_pow.rs
+++ b/builtins-test/tests/float_pow.rs
@@ -1,7 +1,7 @@
 #![allow(unused_macros)]
 #![cfg_attr(f128_enabled, feature(f128))]
-#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
 
+#[cfg_attr(x86_no_sse, allow(unused))]
 use builtins_test::*;
 
 // This is approximate because of issues related to
@@ -52,6 +52,7 @@ macro_rules! pow {
     };
 }
 
+#[cfg(not(x86_no_sse))] // FIXME(i586): failure for powidf2
 pow! {
     f32, 1e-4, __powisf2, all();
     f64, 1e-12, __powidf2, all();
diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs
index 53167d98f..5d59fbb7f 100644
--- a/builtins-test/tests/lse.rs
+++ b/builtins-test/tests/lse.rs
@@ -1,5 +1,6 @@
 #![feature(decl_macro)] // so we can use pub(super)
-#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))]
+#![feature(macro_metavar_expr_concat)]
+#![cfg(all(target_arch = "aarch64", target_os = "linux"))]
 
 /// Translate a byte size to a Rust type.
 macro int_ty {
@@ -87,7 +88,7 @@ test_op!(add, |left, right| left.wrapping_add(right));
 test_op!(clr, |left, right| left & !right);
 test_op!(xor, std::ops::BitXor::bitxor);
 test_op!(or, std::ops::BitOr::bitor);
-
+use compiler_builtins::{foreach_bytes, foreach_ordering};
 compiler_builtins::foreach_cas!(cas::test);
 compiler_builtins::foreach_cas16!(test_cas16);
 compiler_builtins::foreach_swp!(swap::test);
diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs
index 58bc9ab4a..3072b45dc 100644
--- a/builtins-test/tests/mul.rs
+++ b/builtins-test/tests/mul.rs
@@ -113,7 +113,7 @@ macro_rules! float_mul {
     };
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_mul {
     use super::*;
 
@@ -126,7 +126,7 @@ mod float_mul {
 }
 
 #[cfg(f128_enabled)]
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 mod float_mul_f128 {
     use super::*;
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 5b6974fe4..12228b9da 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -28,7 +28,7 @@ function run_icount_benchmarks() {
 
     iai_args=(
         "--home" "$(pwd)/$iai_home"
-        "--regression=ir=5.0"
+        "--callgrind-limits=ir=5.0"
         "--save-summary"
     )
 
@@ -46,17 +46,18 @@ function run_icount_benchmarks() {
         shift
     done
 
-    # Run iai-callgrind benchmarks
-    cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
+    # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to
+    # capture rather than exit on error.
+    (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true
+    exit_code="$?"
 
-    # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
-    # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-    if [ -n "${PR_NUMBER:-}" ]; then
-        # If this is for a pull request, ignore regressions if specified.
-        ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
-    else
+    if [ "$exit_code" -eq 0 ]; then
+        echo "Benchmarks completed with no regressions"
+    elif [ -z "${PR_NUMBER:-}" ]; then
         # Disregard regressions after merge
-        ./ci/ci-util.py check-regressions --home "$iai_home" || true
+        echo "Benchmarks completed with regressions; ignoring (not in a PR)"
+    else
+        ./ci/ci-util.py handle-bench-regressions "$PR_NUMBER"
     fi
 }
 
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 6c8b43980..c1db17c6c 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -7,11 +7,13 @@
 
 import json
 import os
+import pprint
 import re
 import subprocess as sp
 import sys
 from dataclasses import dataclass
-from glob import glob, iglob
+from functools import cache
+from glob import glob
 from inspect import cleandoc
 from os import getenv
 from pathlib import Path
@@ -38,14 +40,10 @@
 
             Note that `--extract` will overwrite files in `iai-home`.
 
-        check-regressions [--home iai-home] [--allow-pr-override pr_number]
-            Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
-            files and see if there are any regressions. This is used as a workaround
-            for `iai-callgrind` not exiting with error status; see
-            <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
-
-            If `--allow-pr-override` is specified, the regression check will not exit
-            with failure if any line in the PR starts with `allow-regressions`.
+        handle-bench-regressions PR_NUMBER
+            Exit with success if the pull request contains a line starting with
+            `ci: allow-regressions`, indicating that regressions in benchmarks should
+            be accepted. Otherwise, exit 1.
     """
 )
 
@@ -54,15 +52,6 @@
 DEFAULT_BRANCH = "master"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
 ARTIFACT_PREFIX = "baseline-icount*"
-# Place this in a PR body to skip regression checks (must be at the start of a line).
-REGRESSION_DIRECTIVE = "ci: allow-regressions"
-# Place this in a PR body to skip extensive tests
-SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
-# Place this in a PR body to allow running a large number of extensive tests. If not
-# set, this script will error out if a threshold is exceeded in order to avoid
-# accidentally spending huge amounts of CI time.
-ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
-MANY_EXTENSIVE_THRESHOLD = 20
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -74,7 +63,7 @@
 
 # libm PR CI takes a long time and doesn't need to run unless relevant files have been
 # changed. Anything matching this regex pattern will trigger a run.
-TRIGGER_LIBM_PR_CI = ".*(libm|musl).*"
+TRIGGER_LIBM_CI_FILE_PAT = ".*(libm|musl).*"
 
 TYPES = ["f16", "f32", "f64", "f128"]
 
@@ -84,6 +73,54 @@ def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
 
+@dataclass(init=False)
+class PrCfg:
+    """Directives that we allow in the commit body to control test behavior.
+
+    These are of the form `ci: foo`, at the start of a line.
+    """
+
+    # Skip regression checks (must be at the start of a line).
+    allow_regressions: bool = False
+    # Don't run extensive tests
+    skip_extensive: bool = False
+
+    # Allow running a large number of extensive tests. If not set, this script
+    # will error out if a threshold is exceeded in order to avoid accidentally
+    # spending huge amounts of CI time.
+    allow_many_extensive: bool = False
+
+    # Max number of extensive tests to run by default
+    MANY_EXTENSIVE_THRESHOLD: int = 20
+
+    # Run tests for `libm` that may otherwise be skipped due to no changed files.
+    always_test_libm: bool = False
+
+    # String values of directive names
+    DIR_ALLOW_REGRESSIONS: str = "allow-regressions"
+    DIR_SKIP_EXTENSIVE: str = "skip-extensive"
+    DIR_ALLOW_MANY_EXTENSIVE: str = "allow-many-extensive"
+    DIR_TEST_LIBM: str = "test-libm"
+
+    def __init__(self, body: str):
+        directives = re.finditer(r"^\s*ci:\s*(?P<dir_name>\S*)", body, re.MULTILINE)
+        for dir in directives:
+            name = dir.group("dir_name")
+            if name == self.DIR_ALLOW_REGRESSIONS:
+                self.allow_regressions = True
+            elif name == self.DIR_SKIP_EXTENSIVE:
+                self.skip_extensive = True
+            elif name == self.DIR_ALLOW_MANY_EXTENSIVE:
+                self.allow_many_extensive = True
+            elif name == self.DIR_TEST_LIBM:
+                self.always_test_libm = True
+            else:
+                eprint(f"Found unexpected directive `{name}`")
+                exit(1)
+
+        pprint.pp(self)
+
+
 @dataclass
 class PrInfo:
     """GitHub response for PR query"""
@@ -92,10 +129,21 @@ class PrInfo:
     commits: list[str]
     created_at: str
     number: int
+    cfg: PrCfg
 
     @classmethod
-    def load(cls, pr_number: int | str) -> Self:
-        """For a given PR number, query the body and commit list"""
+    def from_env(cls) -> Self | None:
+        """Create a PR object from the PR_NUMBER environment if set, `None` otherwise."""
+        pr_env = os.environ.get("PR_NUMBER")
+        if pr_env is not None and len(pr_env) > 0:
+            return cls.from_pr(pr_env)
+
+        return None
+
+    @classmethod
+    @cache  # Cache so we don't print info messages multiple times
+    def from_pr(cls, pr_number: int | str) -> Self:
+        """For a given PR number, query the body and commit list."""
         pr_info = sp.check_output(
             [
                 "gh",
@@ -108,13 +156,9 @@ def load(cls, pr_number: int | str) -> Self:
             ],
             text=True,
         )
-        eprint("PR info:", json.dumps(pr_info, indent=4))
-        return cls(**json.loads(pr_info))
-
-    def contains_directive(self, directive: str) -> bool:
-        """Return true if the provided directive is on a line in the PR body"""
-        lines = self.body.splitlines()
-        return any(line.startswith(directive) for line in lines)
+        pr_json = json.loads(pr_info)
+        eprint("PR info:", json.dumps(pr_json, indent=4))
+        return cls(**json.loads(pr_info), cfg=PrCfg(pr_json["body"]))
 
 
 class FunctionDef(TypedDict):
@@ -211,26 +255,32 @@ def may_skip_libm_ci(self) -> bool:
         """If this is a PR and no libm files were changed, allow skipping libm
         jobs."""
 
-        if self.is_pr():
-            return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed)
+        # Always run on merge CI
+        if not self.is_pr():
+            return False
+
+        pr = PrInfo.from_env()
+        assert pr is not None, "Is a PR but couldn't load PrInfo"
+
+        # Allow opting in to libm tests
+        if pr.cfg.always_test_libm:
+            return False
 
-        return False
+        # By default, run if there are any changed files matching the pattern
+        return all(not re.match(TRIGGER_LIBM_CI_FILE_PAT, str(f)) for f in self.changed)
 
     def emit_workflow_output(self):
         """Create a JSON object a list items for each type's changed files, if any
         did change, and the routines that were affected by the change.
         """
 
-        pr_number = os.environ.get("PR_NUMBER")
         skip_tests = False
         error_on_many_tests = False
 
-        if pr_number is not None and len(pr_number) > 0:
-            pr = PrInfo.load(pr_number)
-            skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
-            error_on_many_tests = not pr.contains_directive(
-                ALLOW_MANY_EXTENSIVE_DIRECTIVE
-            )
+        pr = PrInfo.from_env()
+        if pr is not None:
+            skip_tests = pr.cfg.skip_extensive
+            error_on_many_tests = not pr.cfg.allow_many_extensive
 
             if skip_tests:
                 eprint("Skipping all extensive tests")
@@ -257,16 +307,14 @@ def emit_workflow_output(self):
         may_skip = str(self.may_skip_libm_ci()).lower()
         print(f"extensive_matrix={ext_matrix}")
         print(f"may_skip_libm_ci={may_skip}")
-        eprint(f"extensive_matrix={ext_matrix}")
-        eprint(f"may_skip_libm_ci={may_skip}")
         eprint(f"total extensive tests: {total_to_test}")
 
-        if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
+        if error_on_many_tests and total_to_test > PrCfg.MANY_EXTENSIVE_THRESHOLD:
             eprint(
-                f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
-                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is"
+                f"More than {PrCfg.MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
+                f" `{PrCfg.DIR_ALLOW_MANY_EXTENSIVE}` to the PR body if this is"
                 " intentional. If this is refactoring that happens to touch a lot of"
-                f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead."
+                f" files, `{PrCfg.DIR_SKIP_EXTENSIVE}` can be used instead."
             )
             exit(1)
 
@@ -365,64 +413,22 @@ def locate_baseline(flags: list[str]) -> None:
     eprint("baseline extracted successfully")
 
 
-def check_iai_regressions(args: list[str]):
-    """Find regressions in iai summary.json files, exit with failure if any are
-    found.
-    """
-
-    iai_home_str = "iai-home"
-    pr_number = None
-
-    while len(args) > 0:
-        match args:
-            case ["--home", home, *rest]:
-                iai_home_str = home
-                args = rest
-            case ["--allow-pr-override", pr_num, *rest]:
-                pr_number = pr_num
-                args = rest
-            case _:
-                eprint(USAGE)
-                exit(1)
-
-    iai_home = Path(iai_home_str)
-
-    found_summaries = False
-    regressions: list[dict] = []
-    for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
-        found_summaries = True
-        with open(iai_home / summary_path, "r") as f:
-            summary = json.load(f)
-
-        summary_regs = []
-        run = summary["callgrind_summary"]["callgrind_run"]
-        fname = summary["function_name"]
-        id = summary["id"]
-        name_entry = {"name": f"{fname}.{id}"}
-
-        for segment in run["segments"]:
-            summary_regs.extend(segment["regressions"])
-
-        summary_regs.extend(run["total"]["regressions"])
+def handle_bench_regressions(args: list[str]):
+    """Exit with error unless the PR message contains an ignore directive."""
 
-        regressions.extend(name_entry | reg for reg in summary_regs)
-
-    if not found_summaries:
-        eprint(f"did not find any summary.json files within {iai_home}")
-        exit(1)
+    match args:
+        case [pr_number]:
+            pr_number = pr_number
+        case _:
+            eprint(USAGE)
+            exit(1)
 
-    if len(regressions) == 0:
-        eprint("No regressions found")
+    pr = PrInfo.from_pr(pr_number)
+    if pr.cfg.allow_regressions:
+        eprint("PR allows regressions")
         return
 
-    eprint("Found regressions:", json.dumps(regressions, indent=4))
-
-    if pr_number is not None:
-        pr = PrInfo.load(pr_number)
-        if pr.contains_directive(REGRESSION_DIRECTIVE):
-            eprint("PR allows regressions, returning")
-            return
-
+    eprint("Regressions were found; benchmark failed")
     exit(1)
 
 
@@ -433,8 +439,8 @@ def main():
             ctx.emit_workflow_output()
         case ["locate-baseline", *flags]:
             locate_baseline(flags)
-        case ["check-regressions", *args]:
-            check_iai_regressions(args)
+        case ["handle-bench-regressions", *args]:
+            handle_bench_regressions(args)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()
diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index df71804ba..69b99f5b6 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
index 38ad1a136..2fa6f8520 100644
--- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
+++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
index ffead05d5..85f7335f5 100644
--- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index 9ab49e46e..42511479f 100644
--- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
index d12ced325..35488c477 100644
--- a/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile
index d12ced325..35488c477 100644
--- a/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
index 62b43da9e..e95a1b916 100644
--- a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile
index c02a94672..fd1877603 100644
--- a/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
index 6d8b96069..4e542ce68 100644
--- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
index 7e6ac7c3b..528dfd894 100644
--- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
index 9feadc7b5..257218023 100644
--- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
index 84dcaf47e..cac1f2361 100644
--- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
index b90fd5ec5..76127b7db 100644
--- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
index e6d1d1cd0..da1d56ca6 100644
--- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
@@ -12,6 +12,5 @@ ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
     CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \
     AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
     CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
-    QEMU_CPU=POWER8 \
     QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \
     RUST_TEST_THREADS=1
diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
index eeb4ed019..513efacd6 100644
--- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv6m-none-eabi/Dockerfile
+++ b/ci/docker/thumbv6m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7em-none-eabi/Dockerfile
+++ b/ci/docker/thumbv7em-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7em-none-eabihf/Dockerfile
+++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7m-none-eabi/Dockerfile
+++ b/ci/docker/thumbv7m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index c590adcdd..2ef800129 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index d0122dee5..4c1fe0fe2 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -97,7 +97,7 @@ if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then
     usage: ./ci/run-docker.sh [target]
 
     you can also set DOCKER_BASE_IMAGE to use something other than the default
-    ubuntu:24.04 (or rustlang/rust:nightly).
+    ubuntu:25.04 (or rustlang/rust:nightly).
     "
     exit
 fi
diff --git a/ci/run.sh b/ci/run.sh
index 27b9686ea..bc94d42fe 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -41,7 +41,10 @@ else
     "${test_builtins[@]}" --benches
     "${test_builtins[@]}" --benches --release
 
-    if [ "${TEST_VERBATIM:-}" = "1" ]; then
+    # Validate that having a verbatim path for the target directory works
+    # (trivial to regress using `/` in paths to build artifacts rather than
+    # `Path::join`). MinGW does not currently support these paths.
+    if [[ "$target" = *"windows"* ]] && [[ "$target" != *"gnu"* ]]; then
         verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2)
         "${test_builtins[@]}" --target-dir "$verb_path" --features c
     fi
@@ -54,29 +57,26 @@ symcheck=(cargo run -p symbol-check --release)
 [[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
 symcheck+=(-- build-and-check)
 
-"${symcheck[@]}" -p compiler_builtins --target "$target"
-"${symcheck[@]}" -p compiler_builtins --target "$target" --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins
+"${symcheck[@]}" "$target" -- -p compiler_builtins --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features c
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features c --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 --release
 
 run_intrinsics_test() {
-    args=(
-        --target "$target" --verbose \
-        --manifest-path builtins-test-intrinsics/Cargo.toml
-    )
-    args+=( "$@" )
+    build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml)
+    build_args+=("$@")
 
     # symcheck also checks the results of builtins-test-intrinsics
-    "${symcheck[@]}" "${args[@]}"
+    "${symcheck[@]}" "$target" -- "${build_args[@]}"
 
     # FIXME: we get access violations on Windows, our entrypoint may need to
     # be tweaked.
     if [ "${BUILD_ONLY:-}" != "1" ] && ! [[ "$target" = *"windows"* ]]; then
-        cargo run "${args[@]}"
+        cargo run --target "$target" "${build_args[@]}"
     fi
 }
 
@@ -164,7 +164,7 @@ else
     mflags+=(--workspace --target "$target")
     cmd=(cargo test "${mflags[@]}")
     profile_flag="--profile"
-    
+
     # If nextest is available, use that
     command -v cargo-nextest && nextest=1 || nextest=0
     if [ "$nextest" = "1" ]; then
@@ -207,7 +207,7 @@ else
     "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches
 
     # Ensure that the routines do not panic.
-    # 
+    #
     # `--tests` must be passed because no-panic is only enabled as a dev
     # dependency. The `release-opt` profile must be used to enable LTO and a
     # single CGU.
diff --git a/ci/update-musl.sh b/ci/update-musl.sh
new file mode 100755
index 000000000..637ab1394
--- /dev/null
+++ b/ci/update-musl.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Download musl to a repository for `musl-math-sys`
+
+set -eux
+
+url=https://github.com/kraj/musl.git
+ref=c47ad25ea3b484e10326f933e927c0bc8cded3da
+dst=crates/musl-math-sys/musl
+
+if ! [ -d "$dst" ]; then
+    git clone "$url" "$dst" --single-branch --depth=1000
+fi
+
+git -C "$dst" fetch "$url" --depth=1
+git -C "$dst" checkout "$ref"
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 8ceef286f..8bbe136ce 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,31 +1,32 @@
+# NOTE: Must be kept in sync with `../builtins-shim/Cargo.toml`.
+#
+# This manifest is actually used in-tree by rust-lang/rust,
+# `../builtins-shim/Cargo.toml` is used by out-of-tree testing. See the other
+# manifest for further details.
+
 [package]
-authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
 version = "0.1.160"
-license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
-readme = "README.md"
+authors = ["Jorge Aparicio <japaricious@gmail.com>"]
+description = "Compiler intrinsics used by the Rust compiler."
 repository = "https://github.com/rust-lang/compiler-builtins"
-homepage = "https://github.com/rust-lang/compiler-builtins"
-documentation = "https://docs.rs/compiler_builtins"
+license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 edition = "2024"
-description = "Compiler intrinsics used by the Rust compiler."
+publish = false
 links = "compiler-rt"
 
 [lib]
 bench = false
 doctest = false
 test = false
+# make sure this crate isn't included in public standard library docs
+doc = false
 
 [dependencies]
-# For more information on this dependency see
-# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core
-core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
+core = { path = "../../core", optional = true }
 
 [build-dependencies]
-cc = { optional = true, version = "1.0" }
-
-[dev-dependencies]
-panic-handler = { path = "../crates/panic-handler" }
+cc = { optional = true, version = "1.2" }
 
 [features]
 default = ["compiler-builtins"]
@@ -34,8 +35,9 @@ default = ["compiler-builtins"]
 # implementations and also filling in unimplemented intrinsics
 c = ["dep:cc"]
 
-# Workaround for the Cranelift codegen backend. Disables any implementations
-# which use inline assembly and fall back to pure Rust versions (if available).
+# For implementations where there is both a generic version and a platform-
+# specific version, use the generic version. This is meant to enable testing
+# the generic versions on all platforms.
 no-asm = []
 
 # Workaround for codegen backends which haven't yet implemented `f16` and
@@ -58,7 +60,3 @@ rustc-dep-of-std = ["compiler-builtins", "dep:core"]
 # This makes certain traits and function specializations public that
 # are not normally public but are required by the `builtins-test`
 unstable-public-internals = []
-
-[lints.rust]
-# The cygwin config can be dropped after our benchmark toolchain is bumped
-unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] }
diff --git a/compiler-builtins/LICENSE.txt b/compiler-builtins/LICENSE.txt
deleted file mode 120000
index 4ab43736a..000000000
--- a/compiler-builtins/LICENSE.txt
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE.txt
\ No newline at end of file
diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index d37fdc5df..43b978606 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -1,11 +1,8 @@
 mod configure;
 
-use std::collections::BTreeMap;
 use std::env;
-use std::path::PathBuf;
-use std::sync::atomic::Ordering;
 
-use configure::{Target, configure_aliases, configure_f16_f128};
+use configure::{Target, configure_aliases};
 
 fn main() {
     println!("cargo::rerun-if-changed=build.rs");
@@ -15,13 +12,15 @@ fn main() {
     let cwd = env::current_dir().unwrap();
 
     configure_check_cfg();
-    configure_f16_f128(&target);
     configure_aliases(&target);
 
     configure_libm(&target);
 
     println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
 
+    println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
+    println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
+
     // Emscripten's runtime includes all the builtins
     if target.os == "emscripten" {
         return;
@@ -47,7 +46,6 @@ fn main() {
     }
 
     // These targets have hardware unaligned access support.
-    println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
     if target.arch.contains("x86_64")
         || target.arch.contains("x86")
         || target.arch.contains("aarch64")
@@ -78,17 +76,12 @@ fn main() {
     // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
     // includes the old androideabi. It is deprecated but it is available as a
     // rustc target (arm-linux-androideabi).
-    println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
     if llvm_target[0] == "armv4t"
         || llvm_target[0] == "armv5te"
         || target.triple == "arm-linux-androideabi"
     {
         println!("cargo:rustc-cfg=kernel_user_helpers")
     }
-
-    if llvm_target[0].starts_with("aarch64") {
-        generate_aarch64_outlined_atomics();
-    }
 }
 
 /// Run configuration for `libm` since it is included directly.
@@ -113,13 +106,6 @@ fn configure_libm(target: &Target) {
         println!("cargo:rustc-cfg=optimizations_enabled");
     }
 
-    // Config shorthands
-    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
-    if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo:rustc-cfg=x86_no_sse");
-    }
-
     println!(
         "cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
         target.cargo_features
@@ -131,61 +117,6 @@ fn configure_libm(target: &Target) {
     println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\"");
 }
 
-fn aarch64_symbol(ordering: Ordering) -> &'static str {
-    match ordering {
-        Ordering::Relaxed => "relax",
-        Ordering::Acquire => "acq",
-        Ordering::Release => "rel",
-        Ordering::AcqRel => "acq_rel",
-        _ => panic!("unknown symbol for {ordering:?}"),
-    }
-}
-
-/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
-/// Define them from the build script instead.
-/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
-fn generate_aarch64_outlined_atomics() {
-    use std::fmt::Write;
-    // #[macro_export] so that we can use this in tests
-    let gen_macro =
-        |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
-
-    // Generate different macros for add/clr/eor/set so that we can test them separately.
-    let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
-    let mut macros = BTreeMap::new();
-    for sym in sym_names {
-        macros.insert(sym, gen_macro(sym));
-    }
-
-    // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
-    let mut cas16 = gen_macro("cas16");
-
-    for ordering in [
-        Ordering::Relaxed,
-        Ordering::Acquire,
-        Ordering::Release,
-        Ordering::AcqRel,
-    ] {
-        let sym_ordering = aarch64_symbol(ordering);
-        for size in [1, 2, 4, 8] {
-            for (sym, macro_) in &mut macros {
-                let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
-                writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
-            }
-        }
-        let name = format!("__aarch64_cas16_{sym_ordering}");
-        writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
-    }
-
-    let mut buf = String::new();
-    for macro_def in macros.values().chain(std::iter::once(&cas16)) {
-        buf += macro_def;
-        buf += "}; }\n";
-    }
-    let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
-    std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap();
-}
-
 /// Emit directives for features we expect to support that aren't in `Cargo.toml`.
 ///
 /// These are mostly cfg elements emitted by this `build.rs`.
diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs
index d825f35a9..79e238abc 100644
--- a/compiler-builtins/configure.rs
+++ b/compiler-builtins/configure.rs
@@ -1,6 +1,6 @@
 // Configuration that is shared between `compiler_builtins` and `builtins_test`.
 
-use std::env;
+use std::{env, str};
 
 #[derive(Debug)]
 #[allow(dead_code)]
@@ -16,6 +16,8 @@ pub struct Target {
     pub pointer_width: u8,
     pub little_endian: bool,
     pub features: Vec<String>,
+    pub reliable_f128: bool,
+    pub reliable_f16: bool,
 }
 
 impl Target {
@@ -51,6 +53,10 @@ impl Target {
                 .split(",")
                 .map(ToOwned::to_owned)
                 .collect(),
+            // Note that these are unstable options, so only show up with the nightly compiler or
+            // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway).
+            reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(),
+            reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
         }
     }
 
@@ -74,63 +80,31 @@ pub fn configure_aliases(target: &Target) {
     if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" {
         println!("cargo:rustc-cfg=thumb_1")
     }
-}
 
-/// Configure whether or not `f16` and `f128` support should be enabled.
-pub fn configure_f16_f128(target: &Target) {
-    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
-    // that the backend will not crash when using these types and generates code that can be called
-    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
-    // ABI or other bugs.
-    //
-    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
-    // not straightforward.
-    //
-    // Original source of this list:
-    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
-    let f16_enabled = match target.arch.as_str() {
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
-        "s390x" => false,
-        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
-        "csky" => false,
-        "hexagon" => false,
-        "powerpc" | "powerpc64" => false,
-        "sparc" | "sparc64" => false,
-        "wasm32" | "wasm64" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
+    // Config shorthands
+    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
+    if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
+        // Shorthand to detect i586 targets
+        println!("cargo:rustc-cfg=x86_no_sse");
+    }
 
-    let f128_enabled = match target.arch.as_str() {
-        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
-        "amdgpu" => false,
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // FIXME(llvm20): fixed by <https://github.com/llvm/llvm-project/pull/117525>
-        "mips64" | "mips64r6" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
-        "nvptx64" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
-        "powerpc64" if &target.os == "aix" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
-        "sparc" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
+    /* Not all backends support `f16` and `f128` to the same level on all architectures, so we
+     * need to disable things if the compiler may crash. See configuration at:
+     * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432
+     * * https://github.com/rust-lang/rustc_codegen_gcc/blob/4b5c44b14166083eef8d71f15f5ea1f53fc976a0/src/lib.rs#L496-L507
+     * * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
+     */
 
-    // If the feature is set, disable these types.
-    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+    // If the feature is set, disable both of these types.
+    let no_f16_f128 = target.cargo_features.iter().any(|s| s == "no-f16-f128");
 
     println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
-    println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
-
-    if f16_enabled && !disable_both {
+    if target.reliable_f16 && !no_f16_f128 {
         println!("cargo::rustc-cfg=f16_enabled");
     }
 
-    if f128_enabled && !disable_both {
+    println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
+    if target.reliable_f128 && !no_f16_f128 {
         println!("cargo::rustc-cfg=f128_enabled");
     }
 }
diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs
index 80392187c..039fab206 100644
--- a/compiler-builtins/src/aarch64.rs
+++ b/compiler-builtins/src/aarch64.rs
@@ -4,8 +4,8 @@ use core::intrinsics;
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(target_os = "uefi", not(feature = "no-asm")))]
-    pub unsafe extern "C" fn __chkstk() {
+    #[cfg(target_os = "uefi")]
+    pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
             ".p2align 2",
             "lsl    x16, x15, #4",
diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs
index e238d0237..38fcab152 100644
--- a/compiler-builtins/src/aarch64_linux.rs
+++ b/compiler-builtins/src/aarch64_linux.rs
@@ -4,7 +4,7 @@
 //! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics",
 //! where atomic operations call into the compiler runtime to dispatch between two depending on
 //! which is supported on the current CPU.
-//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion.
+//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
 //!
 //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
 //! Use the `compiler-rt` intrinsics if you want LSE support.
@@ -262,8 +262,78 @@ macro_rules! or {
     };
 }
 
-// See `generate_aarch64_outlined_atomics` in build.rs.
-include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
+#[macro_export]
+macro_rules! foreach_ordering {
+    ($macro:path, $bytes:tt, $name:ident) => {
+        $macro!( Relaxed, $bytes, ${concat($name, _relax)} );
+        $macro!( Acquire, $bytes, ${concat($name, _acq)} );
+        $macro!( Release, $bytes, ${concat($name, _rel)} );
+        $macro!( AcqRel, $bytes, ${concat($name, _acq_rel)} );
+    };
+    ($macro:path, $name:ident) => {
+        $macro!( Relaxed, ${concat($name, _relax)} );
+        $macro!( Acquire, ${concat($name, _acq)} );
+        $macro!( Release, ${concat($name, _rel)} );
+        $macro!( AcqRel, ${concat($name, _acq_rel)} );
+    };
+}
+
+#[macro_export]
+macro_rules! foreach_bytes {
+    ($macro:path, $name:ident) => {
+        foreach_ordering!( $macro, 1, ${concat(__aarch64_, $name, "1")} );
+        foreach_ordering!( $macro, 2, ${concat(__aarch64_, $name, "2")} );
+        foreach_ordering!( $macro, 4, ${concat(__aarch64_, $name, "4")} );
+        foreach_ordering!( $macro, 8, ${concat(__aarch64_, $name, "8")} );
+    };
+}
+
+/// Generate different macros for cas/swp/add/clr/eor/set so that we can test them separately.
+#[macro_export]
+macro_rules! foreach_cas {
+    ($macro:path) => {
+        foreach_bytes!($macro, cas);
+    };
+}
+
+/// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
+#[macro_export]
+macro_rules! foreach_cas16 {
+    ($macro:path) => {
+        foreach_ordering!($macro, __aarch64_cas16);
+    };
+}
+#[macro_export]
+macro_rules! foreach_swp {
+    ($macro:path) => {
+        foreach_bytes!($macro, swp);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldadd {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldadd);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldclr {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldclr);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldeor {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldeor);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldset {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldset);
+    };
+}
+
 foreach_cas!(compare_and_swap);
 foreach_cas16!(compare_and_swap_i128);
 foreach_swp!(swap);
diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index a7d84e49b..0c15b37df 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -1,5 +1,3 @@
-#![cfg(not(feature = "no-asm"))]
-
 // Interfaces used by naked trampolines.
 // SAFETY: these are defined in compiler-builtins
 unsafe extern "C" {
@@ -9,11 +7,10 @@ unsafe extern "C" {
 }
 
 // SAFETY: these are defined in compiler-builtins
-// FIXME(extern_custom), this isn't always the correct ABI
-unsafe extern "aapcs" {
+unsafe extern "custom" {
     // AAPCS is not always the correct ABI for these intrinsics, but we only use this to
     // forward another `__aeabi_` call so it doesn't matter.
-    fn __aeabi_idiv(a: i32, b: i32) -> i32;
+    fn __aeabi_idiv();
 }
 
 intrinsics! {
@@ -21,7 +18,7 @@ intrinsics! {
     // custom calling convention which can't be implemented using a normal Rust function.
     #[unsafe(naked)]
     #[cfg(not(target_env = "msvc"))]
-    pub unsafe extern "C" fn __aeabi_uidivmod() {
+    pub unsafe extern "custom" fn __aeabi_uidivmod() {
         core::arch::naked_asm!(
             "push {{lr}}",
             "sub sp, sp, #4",
@@ -35,7 +32,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_uldivmod() {
+    pub unsafe extern "custom" fn __aeabi_uldivmod() {
         core::arch::naked_asm!(
             "push {{r4, lr}}",
             "sub sp, sp, #16",
@@ -51,7 +48,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_idivmod() {
+    pub unsafe extern "custom" fn __aeabi_idivmod() {
         core::arch::naked_asm!(
             "push {{r0, r1, r4, lr}}",
             "bl {trampoline}",
@@ -64,7 +61,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_ldivmod() {
+    pub unsafe extern "custom" fn __aeabi_ldivmod() {
         core::arch::naked_asm!(
             "push {{r4, lr}}",
             "sub sp, sp, #16",
@@ -135,8 +132,8 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
     pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
-        debug_assert!(src.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
+        debug_assert!(src.addr().is_multiple_of(8));
 
         // SAFETY: memcpy preconditions apply, less strict alignment.
         unsafe { __aeabi_memcpy4(dst, src, n) };
@@ -161,8 +158,8 @@ intrinsics! {
     /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 3 == 0);
-        debug_assert!(src.addr() & 3 == 0);
+        debug_assert!(dst.addr().is_multiple_of(4));
+        debug_assert!(src.addr().is_multiple_of(4));
 
         // SAFETY: same preconditions, less strict aligment.
         unsafe { __aeabi_memmove(dst, src, n) };
@@ -176,8 +173,8 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
-        debug_assert!(src.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
+        debug_assert!(src.addr().is_multiple_of(8));
 
         // SAFETY: memmove preconditions apply, less strict alignment.
         unsafe { __aeabi_memmove(dst, src, n) };
@@ -236,7 +233,7 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
     pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) {
-        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
 
         // SAFETY: memset preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, c) };
@@ -261,7 +258,7 @@ intrinsics! {
     /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) {
-        debug_assert!(dst.addr() & 3 == 0);
+        debug_assert!(dst.addr().is_multiple_of(4));
 
         // SAFETY: memclr preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, 0) };
@@ -275,7 +272,7 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
 
         // SAFETY: memclr preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, 0) };
diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs
index 6ce67ba71..ab9f86807 100644
--- a/compiler-builtins/src/arm_linux.rs
+++ b/compiler-builtins/src/arm_linux.rs
@@ -4,12 +4,17 @@ use core::{arch, mem};
 // Kernel-provided user-mode helper functions:
 // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
 unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
-    let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ());
+    // FIXME(volatile): the third parameter is a volatile pointer
+    // SAFETY: kernel docs specify a known address with the given signature
+    let f = unsafe {
+        mem::transmute::<_, extern "C" fn(u32, u32, *mut u32) -> u32>(0xffff0fc0usize as *const ())
+    };
     f(oldval, newval, ptr) == 0
 }
 
 unsafe fn __kuser_memory_barrier() {
-    let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ());
+    // SAFETY: kernel docs specify a known address with the given signature
+    let f = unsafe { mem::transmute::<_, extern "C" fn()>(0xffff0fa0usize as *const ()) };
     f();
 }
 
@@ -67,8 +72,10 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
 /// - if `size_of::<T>() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic
 ///   read of 2 bytes.
 /// - if `size_of::<T>() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes.
+// FIXME: assert some of the preconditions in debug mode
 unsafe fn atomic_load_aligned<T>(ptr: *mut u32) -> u32 {
-    if mem::size_of::<T>() == 4 {
+    const { assert!(size_of::<T>() <= 4) };
+    if size_of::<T>() == 4 {
         // SAFETY: As `T` has a size of 4, the caller garantees this is sound.
         unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) }
     } else {
@@ -100,11 +107,13 @@ unsafe fn atomic_rmw<T, F: Fn(u32) -> u32, G: Fn(u32, u32) -> u32>(ptr: *mut T,
     let (shift, mask) = get_shift_mask(ptr);
 
     loop {
-        let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
+        // FIXME(safety): preconditions review needed
+        let curval_aligned = unsafe { atomic_load_aligned::<T>(aligned_ptr) };
         let curval = extract_aligned(curval_aligned, shift, mask);
         let newval = f(curval);
         let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
-        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+        // FIXME(safety): preconditions review needed
+        if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } {
             return g(curval, newval);
         }
     }
@@ -116,13 +125,15 @@ unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
     let (shift, mask) = get_shift_mask(ptr);
 
     loop {
-        let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
+        // FIXME(safety): preconditions review needed
+        let curval_aligned = unsafe { atomic_load_aligned::<T>(aligned_ptr) };
         let curval = extract_aligned(curval_aligned, shift, mask);
         if curval != oldval {
             return curval;
         }
         let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
-        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+        // FIXME(safety): preconditions review needed
+        if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } {
             return oldval;
         }
     }
@@ -132,7 +143,14 @@ macro_rules! atomic_rmw {
     ($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
         intrinsics! {
             pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
-                atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty
+                // FIXME(safety): preconditions review needed
+                unsafe {
+                    atomic_rmw(
+                        ptr,
+                        |x| $op(x as $ty, val) as u32,
+                        |old, new| $fetch(old, new)
+                    ) as $ty
+                }
             }
         }
     };
@@ -149,7 +167,8 @@ macro_rules! atomic_cmpxchg {
     ($name:ident, $ty:ty) => {
         intrinsics! {
             pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
-                atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty
+                // FIXME(safety): preconditions review needed
+                unsafe { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty }
             }
         }
     };
@@ -285,6 +304,7 @@ atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
 
 intrinsics! {
     pub unsafe extern "C" fn __sync_synchronize() {
-        __kuser_memory_barrier();
+       // SAFETY: preconditions are the same as the calling function.
+       unsafe {  __kuser_memory_barrier() };
     }
 }
diff --git a/compiler-builtins/src/hexagon.rs b/compiler-builtins/src/hexagon.rs
index 91cf91c31..a5c7b4dfd 100644
--- a/compiler-builtins/src/hexagon.rs
+++ b/compiler-builtins/src/hexagon.rs
@@ -1,5 +1,3 @@
-#![cfg(not(feature = "no-asm"))]
-
 use core::arch::global_asm;
 
 global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs
index b9dee63c4..017a81ac9 100644
--- a/compiler-builtins/src/int/udiv.rs
+++ b/compiler-builtins/src/int/udiv.rs
@@ -44,7 +44,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __udivmodqi4() {
+    pub unsafe extern "custom" fn __udivmodqi4() {
         // compute unsigned 8-bit `n / d` and `n % d`.
         //
         // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function.
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index 6a6b28067..ca75f44e0 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -1,13 +1,17 @@
 #![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
 #![cfg_attr(all(target_family = "wasm"), feature(wasm_numeric_instr))]
+#![feature(abi_custom)]
 #![feature(abi_unadjusted)]
 #![feature(asm_experimental_arch)]
 #![feature(cfg_target_has_atomic)]
 #![feature(compiler_builtins)]
 #![feature(core_intrinsics)]
 #![feature(linkage)]
+#![feature(asm_cfg)]
 #![feature(naked_functions)]
 #![feature(repr_simd)]
+#![feature(macro_metavar_expr_concat)]
+#![feature(rustc_attrs)]
 #![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 #![no_builtins]
@@ -56,7 +60,7 @@ pub mod arm;
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 pub mod aarch64;
 
-#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))]
+#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
 pub mod aarch64_linux;
 
 #[cfg(all(
diff --git a/compiler-builtins/src/math/libm_math b/compiler-builtins/src/math/libm_math
deleted file mode 120000
index 4d65313c2..000000000
--- a/compiler-builtins/src/math/libm_math
+++ /dev/null
@@ -1 +0,0 @@
-../../../libm/src/math
\ No newline at end of file
diff --git a/compiler-builtins/src/math/mod.rs b/compiler-builtins/src/math/mod.rs
index 078feb9ff..62d729674 100644
--- a/compiler-builtins/src/math/mod.rs
+++ b/compiler-builtins/src/math/mod.rs
@@ -2,6 +2,7 @@
 #[allow(dead_code)]
 #[allow(unused_imports)]
 #[allow(clippy::all)]
+#[path = "../../../libm/src/math/mod.rs"]
 pub(crate) mod libm_math;
 
 macro_rules! libm_intrinsics {
diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs
index 14a478748..da16dee25 100644
--- a/compiler-builtins/src/mem/impls.rs
+++ b/compiler-builtins/src/mem/impls.rs
@@ -15,6 +15,7 @@
 // this use. Of course this is not a guarantee that such use will work, it just means that this
 // crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if
 // something does go wrong at runtime.
+use core::ffi::c_int;
 use core::intrinsics::likely;
 
 const WORD_SIZE: usize = core::mem::size_of::<usize>();
@@ -384,13 +385,13 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
 }
 
 #[inline(always)]
-pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> c_int {
     let mut i = 0;
     while i < n {
         let a = *s1.wrapping_add(i);
         let b = *s2.wrapping_add(i);
         if a != b {
-            return a as i32 - b as i32;
+            return c_int::from(a) - c_int::from(b);
         }
         i += 1;
     }
diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs
index 6828f3804..a227f60a2 100644
--- a/compiler-builtins/src/mem/mod.rs
+++ b/compiler-builtins/src/mem/mod.rs
@@ -3,13 +3,6 @@
 // FIXME(e2024): this eventually needs to be removed.
 #![allow(unsafe_op_in_unsafe_fn)]
 
-#[allow(warnings)]
-#[cfg(target_pointer_width = "16")]
-type c_int = i16;
-#[allow(warnings)]
-#[cfg(not(target_pointer_width = "16"))]
-type c_int = i32;
-
 // memcpy/memmove/memset have optimized implementations on some architectures
 #[cfg_attr(
     all(not(feature = "no-asm"), target_arch = "x86_64"),
@@ -38,18 +31,18 @@ intrinsics! {
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 {
+    pub unsafe extern "C" fn memset(s: *mut u8, c: core::ffi::c_int, n: usize) -> *mut u8 {
         impls::set_bytes(s, c as u8, n);
         s
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+    pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int {
         impls::compare_bytes(s1, s2, n)
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+    pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int {
         memcmp(s1, s2, n)
     }
 
diff --git a/compiler-builtins/src/mem/x86_64.rs b/compiler-builtins/src/mem/x86_64.rs
index 5cbe83ab1..fb29eb11b 100644
--- a/compiler-builtins/src/mem/x86_64.rs
+++ b/compiler-builtins/src/mem/x86_64.rs
@@ -69,7 +69,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
         "rep movsb",
         "sub $7, %rsi",
         "sub $7, %rdi",
-        "mov {qword_count}, %rcx",
+        "mov {qword_count:r}, %rcx",
         "rep movsq",
         "test {pre_byte_count:e}, {pre_byte_count:e}",
         "add $7, %rsi",
@@ -212,7 +212,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
     let x = {
         let r;
         asm!(
-            "movdqa ({addr}), {dest}",
+            "movdqa ({addr:r}), {dest}",
             addr = in(reg) s,
             dest = out(xmm_reg) r,
             options(att_syntax, nostack),
@@ -232,7 +232,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
         let x = {
             let r;
             asm!(
-                "movdqa ({addr}), {dest}",
+                "movdqa ({addr:r}), {dest}",
                 addr = in(reg) s,
                 dest = out(xmm_reg) r,
                 options(att_syntax, nostack),
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index c9070cf55..9a18216da 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -44,216 +44,84 @@
 #![cfg(not(feature = "mangled-names"))]
 // Windows and Cygwin already has builtins to do this.
 #![cfg(not(any(windows, target_os = "cygwin")))]
-// All these builtins require assembly
-#![cfg(not(feature = "no-asm"))]
 // We only define stack probing for these architectures today.
 #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 
-// SAFETY: defined in this module.
-// FIXME(extern_custom): the ABI is not correct.
-unsafe extern "C" {
-    pub fn __rust_probestack();
-}
-
-// A wrapper for our implementation of __rust_probestack, which allows us to
-// keep the assembly inline while controlling all CFI directives in the assembly
-// emitted for the function.
-//
-// This is the ELF version.
-#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .pushsection .text.__rust_probestack
-            .globl __rust_probestack
-            .type  __rust_probestack, @function
-            .hidden __rust_probestack
-        __rust_probestack:
-            ",
-            $body,
-            "
-            .size __rust_probestack, . - __rust_probestack
-            .popsection
-            "
-        )
-    };
-}
-
-#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl __rust_probestack
-        __rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
-// Same as above, but for Mach-O. Note that the triple underscore
-// is deliberate
-#[cfg(target_vendor = "apple")]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl ___rust_probestack
-        ___rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
-// In UEFI x86 arch, triple underscore is deliberate.
-#[cfg(all(target_os = "uefi", target_arch = "x86"))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl ___rust_probestack
-        ___rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
 // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
 // ensuring that if any pages are unmapped we'll make a page fault.
 //
 // The ABI here is that the stack frame size is located in `%rax`. Upon
 // return we're not supposed to modify `%rsp` or `%rax`.
-//
-// Any changes to this function should be replicated to the SGX version below.
-#[cfg(all(
-    target_arch = "x86_64",
-    not(all(target_env = "sgx", target_vendor = "fortanix"))
-))]
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[cfg(target_arch = "x86_64")]
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "custom" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
-    .cfi_startproc
-    pushq  %rbp
-    .cfi_adjust_cfa_offset 8
-    .cfi_offset %rbp, -16
-    movq   %rsp, %rbp
-    .cfi_def_cfa_register %rbp
-
-    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
-
-    // Main loop, taken in one page increments. We're decrementing rsp by
-    // a page each time until there's less than a page remaining. We're
-    // guaranteed that this function isn't called unless there's more than a
-    // page needed.
-    //
-    // Note that we're also testing against `8(%rsp)` to account for the 8
-    // bytes pushed on the stack orginally with our return address. Using
-    // `8(%rsp)` simulates us testing the stack pointer in the caller's
-    // context.
-
-    // It's usually called when %rax >= 0x1000, but that's not always true.
-    // Dynamic stack allocation, which is needed to implement unsized
-    // rvalues, triggers stackprobe even if %rax < 0x1000.
-    // Thus we have to check %r11 first to avoid segfault.
-    cmp    $0x1000,%r11
-    jna    3f
-2:
-    sub    $0x1000,%rsp
-    test   %rsp,8(%rsp)
-    sub    $0x1000,%r11
-    cmp    $0x1000,%r11
-    ja     2b
-
-3:
-    // Finish up the last remaining stack space requested, getting the last
-    // bits out of r11
-    sub    %r11,%rsp
-    test   %rsp,8(%rsp)
-
-    // Restore the stack pointer to what it previously was when entering
-    // this function. The caller will readjust the stack pointer after we
-    // return.
-    add    %rax,%rsp
-
-    leave
-    .cfi_def_cfa_register %rsp
-    .cfi_adjust_cfa_offset -8
-    ret
-    .cfi_endproc
+            .cfi_startproc
+            pushq  %rbp
+            .cfi_adjust_cfa_offset 8
+            .cfi_offset %rbp, -16
+            movq   %rsp, %rbp
+            .cfi_def_cfa_register %rbp
+
+            mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+            // Main loop, taken in one page increments. We're decrementing rsp by
+            // a page each time until there's less than a page remaining. We're
+            // guaranteed that this function isn't called unless there's more than a
+            // page needed.
+            //
+            // Note that we're also testing against `8(%rsp)` to account for the 8
+            // bytes pushed on the stack orginally with our return address. Using
+            // `8(%rsp)` simulates us testing the stack pointer in the caller's
+            // context.
+
+            // It's usually called when %rax >= 0x1000, but that's not always true.
+            // Dynamic stack allocation, which is needed to implement unsized
+            // rvalues, triggers stackprobe even if %rax < 0x1000.
+            // Thus we have to check %r11 first to avoid segfault.
+            cmp    $0x1000,%r11
+            jna    3f
+        2:
+            sub    $0x1000,%rsp
+            test   %rsp,8(%rsp)
+            sub    $0x1000,%r11
+            cmp    $0x1000,%r11
+            ja     2b
+
+        3:
+            // Finish up the last remaining stack space requested, getting the last
+            // bits out of r11
+            sub    %r11,%rsp
+            test   %rsp,8(%rsp)
+
+            // Restore the stack pointer to what it previously was when entering
+            // this function. The caller will readjust the stack pointer after we
+            // return.
+            add    %rax,%rsp
+
+            leave
+            .cfi_def_cfa_register %rsp
+            .cfi_adjust_cfa_offset -8
+    ",
+    #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))]
+    "       ret",
+    #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))]
     "
-    ),
-    options(att_syntax)
-);
-
-// This function is the same as above, except that some instructions are
-// [manually patched for LVI].
-//
-// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
-#[cfg(all(
-    target_arch = "x86_64",
-    all(target_env = "sgx", target_vendor = "fortanix")
-))]
-core::arch::global_asm!(
-    define_rust_probestack!(
-        "
-    .cfi_startproc
-    pushq  %rbp
-    .cfi_adjust_cfa_offset 8
-    .cfi_offset %rbp, -16
-    movq   %rsp, %rbp
-    .cfi_def_cfa_register %rbp
-
-    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
-
-    // Main loop, taken in one page increments. We're decrementing rsp by
-    // a page each time until there's less than a page remaining. We're
-    // guaranteed that this function isn't called unless there's more than a
-    // page needed.
-    //
-    // Note that we're also testing against `8(%rsp)` to account for the 8
-    // bytes pushed on the stack orginally with our return address. Using
-    // `8(%rsp)` simulates us testing the stack pointer in the caller's
-    // context.
-
-    // It's usually called when %rax >= 0x1000, but that's not always true.
-    // Dynamic stack allocation, which is needed to implement unsized
-    // rvalues, triggers stackprobe even if %rax < 0x1000.
-    // Thus we have to check %r11 first to avoid segfault.
-    cmp    $0x1000,%r11
-    jna    3f
-2:
-    sub    $0x1000,%rsp
-    test   %rsp,8(%rsp)
-    sub    $0x1000,%r11
-    cmp    $0x1000,%r11
-    ja     2b
-
-3:
-    // Finish up the last remaining stack space requested, getting the last
-    // bits out of r11
-    sub    %r11,%rsp
-    test   %rsp,8(%rsp)
-
-    // Restore the stack pointer to what it previously was when entering
-    // this function. The caller will readjust the stack pointer after we
-    // return.
-    add    %rax,%rsp
-
-    leave
-    .cfi_def_cfa_register %rsp
-    .cfi_adjust_cfa_offset -8
-    pop %r11
-    lfence
-    jmp *%r11
-    .cfi_endproc
+            // for this target, [manually patch for LVI].
+            //
+            // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+            pop %r11
+            lfence
+            jmp *%r11
+    ",
     "
-    ),
-    options(att_syntax)
-);
+            .cfi_endproc
+    ",
+        options(att_syntax)
+    )
+}
 
 #[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
 // This is the same as x86_64 above, only translated for 32-bit sizes. Note
@@ -261,42 +129,44 @@ core::arch::global_asm!(
 // function basically can't tamper with anything.
 //
 // The ABI here is the same as x86_64, except everything is 32-bits large.
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "custom" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
-    .cfi_startproc
-    push   %ebp
-    .cfi_adjust_cfa_offset 4
-    .cfi_offset %ebp, -8
-    mov    %esp, %ebp
-    .cfi_def_cfa_register %ebp
-    push   %ecx
-    mov    %eax,%ecx
-
-    cmp    $0x1000,%ecx
-    jna    3f
-2:
-    sub    $0x1000,%esp
-    test   %esp,8(%esp)
-    sub    $0x1000,%ecx
-    cmp    $0x1000,%ecx
-    ja     2b
-
-3:
-    sub    %ecx,%esp
-    test   %esp,8(%esp)
-
-    add    %eax,%esp
-    pop    %ecx
-    leave
-    .cfi_def_cfa_register %esp
-    .cfi_adjust_cfa_offset -4
-    ret
-    .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+            .cfi_startproc
+            push   %ebp
+            .cfi_adjust_cfa_offset 4
+            .cfi_offset %ebp, -8
+            mov    %esp, %ebp
+            .cfi_def_cfa_register %ebp
+            push   %ecx
+            mov    %eax,%ecx
+
+            cmp    $0x1000,%ecx
+            jna    3f
+        2:
+            sub    $0x1000,%esp
+            test   %esp,8(%esp)
+            sub    $0x1000,%ecx
+            cmp    $0x1000,%ecx
+            ja     2b
+
+        3:
+            sub    %ecx,%esp
+            test   %esp,8(%esp)
+
+            add    %eax,%esp
+            pop    %ecx
+            leave
+            .cfi_def_cfa_register %esp
+            .cfi_adjust_cfa_offset -4
+            ret
+            .cfi_endproc
+    ",
+        options(att_syntax)
+    )
+}
 
 #[cfg(all(target_arch = "x86", target_os = "uefi"))]
 // UEFI target is windows like target. LLVM will do _chkstk things like windows.
@@ -309,44 +179,46 @@ core::arch::global_asm!(
 //   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
 //   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
 //   themselves.
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "custom" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
-    .cfi_startproc
-    push   %ebp
-    .cfi_adjust_cfa_offset 4
-    .cfi_offset %ebp, -8
-    mov    %esp, %ebp
-    .cfi_def_cfa_register %ebp
-    push   %ecx
-    push   %edx
-    mov    %eax,%ecx
-
-    cmp    $0x1000,%ecx
-    jna    3f
-2:
-    sub    $0x1000,%esp
-    test   %esp,8(%esp)
-    sub    $0x1000,%ecx
-    cmp    $0x1000,%ecx
-    ja     2b
-
-3:
-    sub    %ecx,%esp
-    test   %esp,8(%esp)
-    mov    4(%ebp),%edx
-    mov    %edx, 12(%esp)
-    add    %eax,%esp
-    pop    %edx
-    pop    %ecx
-    leave
-
-    sub   %eax, %esp
-    .cfi_def_cfa_register %esp
-    .cfi_adjust_cfa_offset -4
-    ret
-    .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+            .cfi_startproc
+            push   %ebp
+            .cfi_adjust_cfa_offset 4
+            .cfi_offset %ebp, -8
+            mov    %esp, %ebp
+            .cfi_def_cfa_register %ebp
+            push   %ecx
+            push   %edx
+            mov    %eax,%ecx
+
+            cmp    $0x1000,%ecx
+            jna    3f
+        2:
+            sub    $0x1000,%esp
+            test   %esp,8(%esp)
+            sub    $0x1000,%ecx
+            cmp    $0x1000,%ecx
+            ja     2b
+
+        3:
+            sub    %ecx,%esp
+            test   %esp,8(%esp)
+            mov    4(%ebp),%edx
+            mov    %edx, 12(%esp)
+            add    %eax,%esp
+            pop    %edx
+            pop    %ecx
+            leave
+
+            sub   %eax, %esp
+            .cfi_def_cfa_register %esp
+            .cfi_adjust_cfa_offset -4
+            ret
+            .cfi_endproc
+    ",
+        options(att_syntax)
+    )
+}
diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs
index 01152d9c7..51940b3b3 100644
--- a/compiler-builtins/src/x86.rs
+++ b/compiler-builtins/src/x86.rs
@@ -2,30 +2,24 @@
 
 use core::intrinsics;
 
-// NOTE These functions are implemented using assembly because they using a custom
+// NOTE These functions are implemented using assembly because they use a custom
 // calling convention which can't be implemented using a normal Rust function
 
 // NOTE These functions are never mangled as they are not tested against compiler-rt
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(
-        any(all(windows, target_env = "gnu"), target_os = "uefi"),
-        not(feature = "no-asm")
-    ))]
-    pub unsafe extern "C" fn __chkstk() {
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))]
+    pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
-            "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable"
-            options(att_syntax)
+            "jmp {}", // Jump to __alloca since fallthrough may be unreliable"
+            sym crate::x86::_alloca::_alloca,
         );
     }
 
     #[unsafe(naked)]
-    #[cfg(all(
-        any(all(windows, target_env = "gnu"), target_os = "uefi"),
-        not(feature = "no-asm")
-    ))]
-    pub unsafe extern "C" fn _alloca() {
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))]
+    pub unsafe extern "custom" fn _alloca() {
         // __chkstk and _alloca are the same function
         core::arch::naked_asm!(
             "push   %ecx",
diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs
index fc1190f79..f9ae784d5 100644
--- a/compiler-builtins/src/x86_64.rs
+++ b/compiler-builtins/src/x86_64.rs
@@ -2,22 +2,15 @@
 
 use core::intrinsics;
 
-// NOTE These functions are implemented using assembly because they using a custom
+// NOTE These functions are implemented using assembly because they use a custom
 // calling convention which can't be implemented using a normal Rust function
 
 // NOTE These functions are never mangled as they are not tested against compiler-rt
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(
-        any(
-            all(windows, target_env = "gnu"),
-            target_os = "cygwin",
-            target_os = "uefi"
-        ),
-        not(feature = "no-asm")
-    ))]
-    pub unsafe extern "C" fn ___chkstk_ms() {
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin", target_os = "uefi"))]
+    pub unsafe extern "custom" fn ___chkstk_ms() {
         core::arch::naked_asm!(
             "push   %rcx",
             "push   %rax",
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 3929854f0..100a8d0ec 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -10,9 +10,9 @@ proc-macro = true
 
 [dependencies]
 heck = "0.5.0"
-proc-macro2 = "1.0.94"
+proc-macro2 = "1.0.95"
 quote = "1.0.40"
-syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] }
+syn = { version = "2.0.104", features = ["full", "extra-traits", "visit-mut"] }
 
 [lints.rust]
 # Values used during testing
diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 482da974c..7efa1488f 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,5 +1,3 @@
-#![feature(let_chains)]
-
 mod enums;
 mod parse;
 mod shared;
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index d3fb147e5..39f6fa906 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -11,4 +11,4 @@ license = "MIT OR Apache-2.0"
 libm = { path = "../../libm" }
 
 [build-dependencies]
-cc = "1.2.16"
+cc = "1.2.29"
diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index b00dbc73e..59e42f2d2 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -120,7 +120,7 @@ fn build_musl_math(cfg: &Config) {
     let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch);
     assert!(
         math.exists(),
-        "musl source not found. Is the submodule up to date?"
+        "musl source not found. You may need to run `./ci/update-musl.sh`."
     );
 
     let source_map = find_math_source(&math, cfg);
diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
deleted file mode 160000
index c47ad25ea..000000000
--- a/crates/musl-math-sys/musl
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index 6a4bf4859..9cab8deef 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -40,8 +40,6 @@ macro_rules! functions {
     ) => {
         // Run a simple check to ensure we can link and call the function without crashing.
         #[test]
-        // FIXME(#309): LE PPC crashes calling some musl functions
-        #[cfg_attr(all(target_arch = "powerpc64", target_endian = "little"), ignore)]
         fn $name() {
             <fn($($aty),+) -> $rty>::check(super::$name);
         }
diff --git a/crates/panic-handler/src/lib.rs b/crates/panic-handler/src/lib.rs
index 673e00522..f4d7c8397 100644
--- a/crates/panic-handler/src/lib.rs
+++ b/crates/panic-handler/src/lib.rs
@@ -1,11 +1,8 @@
 //! This is needed for tests on targets that require a `#[panic_handler]` function
 
-#![feature(no_core)]
-#![no_core]
-
-extern crate core;
+#![no_std]
 
 #[panic_handler]
-fn panic(_: &core::panic::PanicInfo) -> ! {
+fn panic(_: &core::panic::PanicInfo<'_>) -> ! {
     loop {}
 }
diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml
index 30969ee40..e2218b491 100644
--- a/crates/symbol-check/Cargo.toml
+++ b/crates/symbol-check/Cargo.toml
@@ -5,8 +5,7 @@ edition = "2024"
 publish = false
 
 [dependencies]
-# FIXME: used as a git dependency since the latest release does not support wasm
-object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
+object = "0.37.1"
 serde_json = "1.0.140"
 
 [features]
diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index d83cd318d..1312a7179 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -8,7 +8,9 @@ use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 
 use object::read::archive::{ArchiveFile, ArchiveMember};
-use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
+use object::{
+    File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection,
+};
 use serde_json::Value;
 
 const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
@@ -16,10 +18,12 @@ const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe")
 
 const USAGE: &str = "Usage:
 
-    symbol-check build-and-check CARGO_ARGS ...
+    symbol-check build-and-check [TARGET] -- CARGO_BUILD_ARGS ...
 
-Cargo will get invoked with `CARGO_ARGS` and all output
+Cargo will get invoked with `CARGO_ARGS` and the specified target. All output
 `compiler_builtins*.rlib` files will be checked.
+
+If TARGET is not specified, the host target is used.
 ";
 
 fn main() {
@@ -28,13 +32,13 @@ fn main() {
     let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
 
     match &args_ref[1..] {
-        ["build-and-check", rest @ ..] if !rest.is_empty() => {
-            let paths = exec_cargo_with_args(rest);
-            for path in paths {
-                println!("Checking {}", path.display());
-                verify_no_duplicates(&path);
-                verify_core_symbols(&path);
-            }
+        ["build-and-check", target, "--", args @ ..] if !args.is_empty() => {
+            check_cargo_args(args);
+            run_build_and_check(target, args);
+        }
+        ["build-and-check", "--", args @ ..] if !args.is_empty() => {
+            check_cargo_args(args);
+            run_build_and_check(&host_target(), args);
         }
         _ => {
             println!("{USAGE}");
@@ -43,14 +47,54 @@ fn main() {
     }
 }
 
+/// Make sure `--target` isn't passed to avoid confusion (since it should be proivded only once,
+/// positionally).
+fn check_cargo_args(args: &[&str]) {
+    for arg in args {
+        assert!(
+            !arg.contains("--target"),
+            "target must be passed positionally. {USAGE}"
+        );
+    }
+}
+
+fn run_build_and_check(target: &str, args: &[&str]) {
+    let paths = exec_cargo_with_args(target, args);
+    for path in paths {
+        println!("Checking {}", path.display());
+        let archive = Archive::from_path(&path);
+
+        verify_no_duplicates(&archive);
+        verify_core_symbols(&archive);
+    }
+}
+
+fn host_target() -> String {
+    let out = Command::new("rustc")
+        .arg("--version")
+        .arg("--verbose")
+        .output()
+        .unwrap();
+    assert!(out.status.success());
+    let out = String::from_utf8(out.stdout).unwrap();
+    out.lines()
+        .find_map(|s| s.strip_prefix("host: "))
+        .unwrap()
+        .to_owned()
+}
+
 /// Run `cargo build` with the provided additional arguments, collecting the list of created
 /// libraries.
-fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
+fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec<PathBuf> {
     let mut cmd = Command::new("cargo");
-    cmd.arg("build")
-        .arg("--message-format=json")
-        .args(args)
-        .stdout(Stdio::piped());
+    cmd.args([
+        "build",
+        "--target",
+        target,
+        "--message-format=json-diagnostic-rendered-ansi",
+    ])
+    .args(args)
+    .stdout(Stdio::piped());
 
     println!("running: {cmd:?}");
     let mut child = cmd.spawn().expect("failed to launch Cargo");
@@ -61,11 +105,21 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
 
     for line in reader.lines() {
         let line = line.expect("failed to read line");
-        println!("{line}"); // tee to stdout
-
-        // Select only steps that create files
         let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
-        if j["reason"] != "compiler-artifact" {
+        let reason = &j["reason"];
+
+        // Forward output that is meant to be user-facing
+        if reason == "compiler-message" {
+            println!("{}", j["message"]["rendered"].as_str().unwrap());
+        } else if reason == "build-finished" {
+            println!("build finshed. success: {}", j["success"]);
+        } else if reason == "build-script-executed" {
+            let pretty = serde_json::to_string_pretty(&j).unwrap();
+            println!("build script output: {pretty}",);
+        }
+
+        // Only interested in the artifact list now
+        if reason != "compiler-artifact" {
             continue;
         }
 
@@ -133,12 +187,12 @@ impl SymInfo {
 /// Note that this will also locate cases where a symbol is weakly defined in more than one place.
 /// Technically there are no linker errors that will come from this, but it keeps our binary more
 /// straightforward and saves some distribution size.
-fn verify_no_duplicates(path: &Path) {
+fn verify_no_duplicates(archive: &Archive) {
     let mut syms = BTreeMap::<String, SymInfo>::new();
     let mut dups = Vec::new();
     let mut found_any = false;
 
-    for_each_symbol(path, |symbol, member| {
+    archive.for_each_symbol(|symbol, member| {
         // Only check defined globals
         if !symbol.is_global() || symbol.is_undefined() {
             return;
@@ -185,12 +239,12 @@ fn verify_no_duplicates(path: &Path) {
 }
 
 /// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
-fn verify_core_symbols(path: &Path) {
+fn verify_core_symbols(archive: &Archive) {
     let mut defined = BTreeSet::new();
     let mut undefined = Vec::new();
     let mut has_symbols = false;
 
-    for_each_symbol(path, |symbol, member| {
+    archive.for_each_symbol(|symbol, member| {
         has_symbols = true;
 
         // Find only symbols from `core`
@@ -219,14 +273,40 @@ fn verify_core_symbols(path: &Path) {
     println!("    success: no undefined references to core found");
 }
 
-/// For a given archive path, do something with each symbol.
-fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
-    let data = fs::read(path).expect("reading file failed");
-    let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
-    for member in archive.members() {
-        let member = member.expect("failed to access member");
-        let obj_data = member.data(&*data).expect("failed to access object");
-        let obj = object::File::parse(obj_data).expect("failed to parse object");
-        obj.symbols().for_each(|sym| f(sym, &member));
+/// Thin wrapper for owning data used by `object`.
+struct Archive {
+    data: Vec<u8>,
+}
+
+impl Archive {
+    fn from_path(path: &Path) -> Self {
+        Self {
+            data: fs::read(path).expect("reading file failed"),
+        }
+    }
+
+    fn file(&self) -> ArchiveFile<'_> {
+        ArchiveFile::parse(self.data.as_slice()).expect("archive parse failed")
+    }
+
+    /// For a given archive, do something with each object file.
+    fn for_each_object(&self, mut f: impl FnMut(ObjFile, &ArchiveMember)) {
+        let archive = self.file();
+
+        for member in archive.members() {
+            let member = member.expect("failed to access member");
+            let obj_data = member
+                .data(self.data.as_slice())
+                .expect("failed to access object");
+            let obj = ObjFile::parse(obj_data).expect("failed to parse object");
+            f(obj, &member);
+        }
+    }
+
+    /// For a given archive, do something with each symbol.
+    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+        self.for_each_object(|obj, member| {
+            obj.symbols().for_each(|sym| f(sym, member));
+        });
     }
 }
diff --git a/etc/thumbv7em-none-eabi-renamed.json b/etc/thumbv7em-none-eabi-renamed.json
new file mode 100644
index 000000000..81273d44e
--- /dev/null
+++ b/etc/thumbv7em-none-eabi-renamed.json
@@ -0,0 +1,23 @@
+{
+  "abi": "eabi",
+  "arch": "arm",
+  "c-enum-min-bits": 8,
+  "crt-objects-fallback": "false",
+  "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64",
+  "emit-debug-gdb-scripts": false,
+  "frame-pointer": "always",
+  "linker": "rust-lld",
+  "linker-flavor": "gnu-lld",
+  "llvm-floatabi": "soft",
+  "llvm-target": "thumbv7em-none-eabi",
+  "max-atomic-width": 32,
+  "metadata": {
+    "description": "Bare ARMv7E-M",
+    "host_tools": false,
+    "std": false,
+    "tier": 2
+  },
+  "panic-strategy": "abort",
+  "relocation-model": "static",
+  "target-pointer-width": "32"
+}
diff --git a/josh-sync.toml b/josh-sync.toml
new file mode 100644
index 000000000..599a12af8
--- /dev/null
+++ b/josh-sync.toml
@@ -0,0 +1,3 @@
+org = "rust-lang"
+repo = "compiler-builtins"
+path = "library/compiler-builtins"
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index 7a306e735..0af6b0c1d 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [features]
-default = ["build-mpfr", "build-musl", "unstable-float"]
+default = ["build-mpfr", "unstable-float"]
 
 # Propagated from libm because this affects which functions we test.
 unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
@@ -28,28 +28,28 @@ icount = ["dep:iai-callgrind"]
 short-benchmarks = []
 
 [dependencies]
-anyhow = "1.0.97"
+anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
-gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
-iai-callgrind = { version = "0.14.0", optional = true }
-indicatif = { version = "0.17.11", default-features = false }
+gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
+iai-callgrind = { version = "0.15.2", optional = true }
+indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
 musl-math-sys = { path = "../crates/musl-math-sys", optional = true }
 paste = "1.0.15"
-rand = "0.9.0"
+rand = "0.9.1"
 rand_chacha = "0.9.0"
 rayon = "1.10.0"
 rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
-getrandom = { version = "0.3.2", features = ["wasm_js"] }
+getrandom = { version = "0.3.3", features = ["wasm_js"] }
 
 [build-dependencies]
-rand = { version = "0.9.0", optional = true }
+rand = { version = "0.9.1", optional = true }
 
 [dev-dependencies]
-criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] }
 libtest-mimic = "0.8.1"
 
 [[bench]]
diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index a0928a29f..02ee13f80 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -119,6 +119,22 @@ fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
     }
 }
 
+#[library_benchmark]
+#[bench::linspace(setup_u256_add())]
+fn icount_bench_u256_sub(cases: Vec<(u256, u256)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) - black_box(y));
+    }
+}
+
+#[library_benchmark]
+#[bench::linspace(setup_u256_shift())]
+fn icount_bench_u256_shl(cases: Vec<(u256, u32)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) << black_box(y));
+    }
+}
+
 #[library_benchmark]
 #[bench::linspace(setup_u256_shift())]
 fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
@@ -129,7 +145,7 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 
 library_benchmark_group!(
     name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_shr
+    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
 );
 
 #[library_benchmark]
diff --git a/libm-test/src/generate/edge_cases.rs b/libm-test/src/generate/edge_cases.rs
index 2fb074638..4e4a782a1 100644
--- a/libm-test/src/generate/edge_cases.rs
+++ b/libm-test/src/generate/edge_cases.rs
@@ -51,6 +51,7 @@ where
 
     // Check some special values that aren't included in the above ranges
     values.push(Op::FTy::NAN);
+    values.push(Op::FTy::NEG_NAN);
     values.extend(Op::FTy::consts().iter());
 
     // Check around the maximum subnormal value
diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
index f5fb5f670..3fb8c1b37 100644
--- a/libm-test/src/precision.rs
+++ b/libm-test/src/precision.rs
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
 
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
-        if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Ceil
-            && ctx.basis == CheckBasis::Musl
-            && input.0 < 0.0
-            && input.0 > -1.0
-            && expected == F::ZERO
-            && actual == F::ZERO
-        {
-            // musl returns -0.0, we return +0.0
-            return XFAIL("i586 ceil signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
             && (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL("i586 rint rounding mode");
         }
 
-        if cfg!(x86_no_sse)
-            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
-            && expected.eq_repr(F::NEG_ZERO)
-            && actual.eq_repr(F::ZERO)
-        {
-            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
-            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
-            return XFAIL("i586 ceil/floor signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
         {
@@ -381,7 +359,7 @@ fn unop_common<F1: Float, F2: Float>(
         }
 
         // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
-        if actual.to_bits() == expected.to_bits() {
+        if actual.biteq(expected) {
             return CheckAction::Custom(Ok(()));
         } else {
             return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns")));
@@ -444,13 +422,18 @@ fn binop_common<F1: Float, F2: Float>(
     expected: F2,
     ctx: &CheckCtx,
 ) -> CheckAction {
-    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if
-    // the first input (magnitude source) is NaN and the output is also a NaN, or if the second
-    // input (sign source) is NaN.
-    if ctx.basis == CheckBasis::Mpfr
+    // MPFR only has one NaN bitpattern; skip tests in cases where the first argument would take
+    // the sign of a NaN second argument. The default NaN checks cover other cases.
+    if ctx.base_name == BaseName::Copysign && ctx.basis == CheckBasis::Mpfr && input.1.is_nan() {
+        return SKIP;
+    }
+
+    // FIXME(#939): this should not be skipped, there is a bug in our implementationi.
+    if ctx.base_name == BaseName::FmaximumNum
+        && ctx.basis == CheckBasis::Mpfr
         && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan())
     {
-        return SKIP;
+        return XFAIL_NOCHECK;
     }
 
     /* FIXME(#439): our fmin and fmax do not compare signed zeros */
diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs
index dbb970161..278274d91 100644
--- a/libm-test/src/test_traits.rs
+++ b/libm-test/src/test_traits.rs
@@ -312,12 +312,9 @@ where
     let mut inner = || -> TestResult {
         let mut allowed_ulp = ctx.ulp;
 
-        // Forbid overrides if the items came from an explicit list, as long as we are checking
-        // against either MPFR or the result itself.
-        let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl;
-
         match SpecialCase::check_float(input, actual, expected, ctx) {
-            _ if require_biteq => (),
+            // Forbid overrides if the items came from an explicit list
+            _ if ctx.gen_kind == GeneratorKind::List => (),
             CheckAction::AssertSuccess => (),
             CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
             CheckAction::Custom(res) => return res,
@@ -327,12 +324,20 @@ where
 
         // Check when both are NaNs
         if actual.is_nan() && expected.is_nan() {
-            if require_biteq && ctx.basis == CheckBasis::None {
-                ensure!(
-                    actual.to_bits() == expected.to_bits(),
-                    "mismatched NaN bitpatterns"
-                );
+            // Don't assert NaN bitwise equality if:
+            //
+            // * Testing against MPFR (there is a single NaN representation)
+            // * Testing against Musl except for explicit tests (Musl does some NaN quieting)
+            //
+            // In these cases, just the check that actual and expected are both NaNs is
+            // sufficient.
+            let skip_nan_biteq = ctx.basis == CheckBasis::Mpfr
+                || (ctx.basis == CheckBasis::Musl && ctx.gen_kind != GeneratorKind::List);
+
+            if !skip_nan_biteq {
+                ensure!(actual.biteq(expected), "mismatched NaN bitpatterns");
             }
+
             // By default, NaNs have nothing special to check.
             return Ok(());
         } else if actual.is_nan() || expected.is_nan() {
diff --git a/libm-test/tests/u256.rs b/libm-test/tests/u256.rs
index 8cbb3ad22..d1c5cfbcc 100644
--- a/libm-test/tests/u256.rs
+++ b/libm-test/tests/u256.rs
@@ -111,12 +111,54 @@ fn mp_u256_add() {
         let y = random_u256(&mut rng);
         assign_bigint(&mut bx, x);
         assign_bigint(&mut by, y);
-        let actual = x + y;
+        let actual = if u256::MAX - x >= y {
+            x + y
+        } else {
+            // otherwise (u256::MAX - x) < y, so the wrapped result is
+            // (x + y) - (u256::MAX + 1) == y - (u256::MAX - x) - 1
+            y - (u256::MAX - x) - 1_u128.widen()
+        };
         bx += &by;
         check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
     }
 }
 
+#[test]
+fn mp_u256_sub() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+
+        // since the operators (may) panic on overflow,
+        // we should test something that doesn't
+        let actual = if x >= y { x - y } else { y - x };
+        bx -= &by;
+        bx.abs_mut();
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_shl() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let shift: u32 = rng.random_range(0..256);
+        assign_bigint(&mut bx, x);
+        let actual = x << shift;
+        bx <<= shift;
+        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+    }
+}
+
 #[test]
 fn mp_u256_shr() {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
@@ -124,7 +166,7 @@ fn mp_u256_shr() {
 
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
-        let shift: u32 = rng.random_range(0..255);
+        let shift: u32 = rng.random_range(0..256);
         assign_bigint(&mut bx, x);
         let actual = x >> shift;
         bx >>= shift;
diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs
index f2ba6a4a0..e04e00c6d 100644
--- a/libm-test/tests/z_extensive/run.rs
+++ b/libm-test/tests/z_extensive/run.rs
@@ -197,15 +197,15 @@ impl Progress {
 
     fn update(&self, completed: u64, input: impl fmt::Debug) {
         // Infrequently update the progress bar.
-        if completed % 20_000 == 0 {
+        if completed.is_multiple_of(20_000) {
             self.pb.set_position(completed);
         }
 
-        if completed % 500_000 == 0 {
+        if completed.is_multiple_of(500_000) {
             self.pb.set_message(format!("input: {input:<24?}"));
         }
 
-        if !self.is_tty && completed % 5_000_000 == 0 {
+        if !self.is_tty && completed.is_multiple_of(5_000_000) {
             let len = self.pb.length().unwrap_or_default();
             eprintln!(
                 "[{elapsed:3?}s {percent:3.0}%] {name} \
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index b6fb5efcf..63b4d3c27 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -1,14 +1,12 @@
 [package]
+name = "libm"
+version = "0.2.15"
 authors = ["Jorge Aparicio <jorge@japaric.io>"]
-categories = ["no-std"]
 description = "libm in pure Rust"
-documentation = "https://docs.rs/libm"
+categories = ["no-std"]
 keywords = ["libm", "math"]
-license = "MIT"
-name = "libm"
-readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
-version = "0.2.15"
+license = "MIT"
 edition = "2021"
 rust-version = "1.63"
 
diff --git a/libm/configure.rs b/libm/configure.rs
index 2a497c7b1..76186e636 100644
--- a/libm/configure.rs
+++ b/libm/configure.rs
@@ -3,12 +3,14 @@
 use std::env;
 use std::path::PathBuf;
 
+#[derive(Debug)]
 #[allow(dead_code)]
 pub struct Config {
     pub manifest_dir: PathBuf,
     pub out_dir: PathBuf,
     pub opt_level: String,
     pub cargo_features: Vec<String>,
+    pub target_triple: String,
     pub target_arch: String,
     pub target_env: String,
     pub target_family: Option<String>,
@@ -16,10 +18,13 @@ pub struct Config {
     pub target_string: String,
     pub target_vendor: String,
     pub target_features: Vec<String>,
+    pub reliable_f128: bool,
+    pub reliable_f16: bool,
 }
 
 impl Config {
     pub fn from_env() -> Self {
+        let target_triple = env::var("TARGET").unwrap();
         let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
@@ -29,6 +34,7 @@ impl Config {
             .collect();
 
         Self {
+            target_triple,
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
             out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
             opt_level: env::var("OPT_LEVEL").unwrap(),
@@ -40,6 +46,10 @@ impl Config {
             target_string: env::var("TARGET").unwrap(),
             target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
             target_features,
+            // Note that these are unstable options, so only show up with the nightly compiler or
+            // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway).
+            reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(),
+            reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
         }
     }
 }
@@ -128,62 +138,18 @@ fn emit_f16_f128_cfg(cfg: &Config) {
         return;
     }
 
-    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
-    // that the backend will not crash when using these types and generates code that can be called
-    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
-    // ABI or other bugs.
-    //
-    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
-    // not straightforward.
-    //
-    // Original source of this list:
-    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
-    let f16_enabled = match cfg.target_arch.as_str() {
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
-        "s390x" => false,
-        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
-        // FIXME(llvm): loongarch fixed by <https://github.com/llvm/llvm-project/pull/107791>
-        "csky" => false,
-        "hexagon" => false,
-        "loongarch64" => false,
-        "mips" | "mips64" | "mips32r6" | "mips64r6" => false,
-        "powerpc" | "powerpc64" => false,
-        "sparc" | "sparc64" => false,
-        "wasm32" | "wasm64" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
-
-    let f128_enabled = match cfg.target_arch.as_str() {
-        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
-        "amdgpu" => false,
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/96432>
-        "mips64" | "mips64r6" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
-        "nvptx64" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
-        "powerpc64" if &cfg.target_os == "aix" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
-        "sparc" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
-
-    // If the feature is set, disable these types.
-    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+    /* See the compiler-builtins configure file for info about the meaning of these options */
 
-    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+    // If the feature is set, disable both of these types.
+    let no_f16_f128 = cfg.cargo_features.iter().any(|s| s == "no-f16-f128");
 
-    if f16_enabled && !disable_both {
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    if cfg.reliable_f16 && !no_f16_f128 {
         println!("cargo:rustc-cfg=f16_enabled");
     }
 
-    if f128_enabled && !disable_both {
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+    if cfg.reliable_f128 && !no_f16_f128 {
         println!("cargo:rustc-cfg=f128_enabled");
     }
 }
diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs
index 23b13251e..89b2e7c5f 100644
--- a/libm/src/math/acos.rs
+++ b/libm/src/math/acos.rs
@@ -59,7 +59,7 @@ fn r(z: f64) -> f64 {
 /// Computes the inverse cosine (arc cosine) of the input value.
 /// Arguments must be in the range -1 to 1.
 /// Returns values in radians, in the range of 0 to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acos(x: f64) -> f64 {
     let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120
     let z: f64;
diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs
index dd88eea5b..d263b3f2c 100644
--- a/libm/src/math/acosf.rs
+++ b/libm/src/math/acosf.rs
@@ -33,7 +33,7 @@ fn r(z: f32) -> f32 {
 /// Computes the inverse cosine (arc cosine) of the input value.
 /// Arguments must be in the range -1 to 1.
 /// Returns values in radians, in the range of 0 to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acosf(x: f32) -> f32 {
     let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs
index d1f5b9fa9..8737bad01 100644
--- a/libm/src/math/acosh.rs
+++ b/libm/src/math/acosh.rs
@@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42,  0xfefa3
 /// Calculates the inverse hyperbolic cosine of `x`.
 /// Is defined as `log(x + sqrt(x*x-1))`.
 /// `x` must be a number greater than or equal to 1.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acosh(x: f64) -> f64 {
     let u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs
index ad3455fdd..432fa03f1 100644
--- a/libm/src/math/acoshf.rs
+++ b/libm/src/math/acoshf.rs
@@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568;
 /// Calculates the inverse hyperbolic cosine of `x`.
 /// Is defined as `log(x + sqrt(x*x-1))`.
 /// `x` must be a number greater than or equal to 1.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acoshf(x: f32) -> f32 {
     let u = x.to_bits();
     let a = u & 0x7fffffff;
diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs
index f92b9a2af..b9a667620 100644
--- a/libm/src/math/arch/i586.rs
+++ b/libm/src/math/arch/i586.rs
@@ -1,37 +1,62 @@
 //! Architecture-specific support for x86-32 without SSE2
+//!
+//! We use an alternative implementation on x86, because the
+//! main implementation fails with the x87 FPU used by
+//! debian i386, probably due to excess precision issues.
+//!
+//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
+//! functions are implemented in this way.
 
-use super::super::fabs;
-
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn ceil(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated < x {
-            return truncated + 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn ceil(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b10 (+∞).
+            "mov word ptr [{x} + 2], 0x0b7f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }
 
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn floor(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated > x {
-            return truncated - 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn floor(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b01 (-∞).
+            "mov word ptr [{x} + 2], 0x077f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }
diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs
index 12d0cd35f..9554a3eac 100644
--- a/libm/src/math/asin.rs
+++ b/libm/src/math/asin.rs
@@ -66,7 +66,7 @@ fn comp_r(z: f64) -> f64 {
 /// Computes the inverse sine (arc sine) of the argument `x`.
 /// Arguments to asin must be in the range -1 to 1.
 /// Returns values in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asin(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs
index ed6855567..2dfe2a6d4 100644
--- a/libm/src/math/asinf.rs
+++ b/libm/src/math/asinf.rs
@@ -35,7 +35,7 @@ fn r(z: f32) -> f32 {
 /// Computes the inverse sine (arc sine) of the argument `x`.
 /// Arguments to asin must be in the range -1 to 1.
 /// Returns values in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinf(mut x: f32) -> f32 {
     let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs
index 75d3c3ad4..d63bc0aa9 100644
--- a/libm/src/math/asinh.rs
+++ b/libm/src/math/asinh.rs
@@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42,  0xfefa3
 ///
 /// Calculates the inverse hyperbolic sine of `x`.
 /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinh(mut x: f64) -> f64 {
     let mut u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs
index 27ed9dd37..3ca2d4489 100644
--- a/libm/src/math/asinhf.rs
+++ b/libm/src/math/asinhf.rs
@@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568;
 ///
 /// Calculates the inverse hyperbolic sine of `x`.
 /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinhf(mut x: f32) -> f32 {
     let u = x.to_bits();
     let i = u & 0x7fffffff;
diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs
index 4ca5cc91a..0590ba87c 100644
--- a/libm/src/math/atan.rs
+++ b/libm/src/math/atan.rs
@@ -65,7 +65,7 @@ const AT: [f64; 11] = [
 ///
 /// Computes the inverse tangent (arc tangent) of the input value.
 /// Returns a value in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan(x: f64) -> f64 {
     let mut x = x;
     let mut ix = (x.to_bits() >> 32) as u32;
diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs
index c668731cf..51456e409 100644
--- a/libm/src/math/atan2.rs
+++ b/libm/src/math/atan2.rs
@@ -47,7 +47,7 @@ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
 /// Computes the inverse tangent (arc tangent) of `y/x`.
 /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0).
 /// Returns a value in radians, in the range of -pi to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan2(y: f64, x: f64) -> f64 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs
index 95b466fff..0f46c9f39 100644
--- a/libm/src/math/atan2f.rs
+++ b/libm/src/math/atan2f.rs
@@ -23,7 +23,7 @@ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */
 /// Computes the inverse tangent (arc tangent) of `y/x`.
 /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0).
 /// Returns a value in radians, in the range of -pi to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan2f(y: f32, x: f32) -> f32 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs
index da8daa41a..58568d9a8 100644
--- a/libm/src/math/atanf.rs
+++ b/libm/src/math/atanf.rs
@@ -41,7 +41,7 @@ const A_T: [f32; 5] = [
 ///
 /// Computes the inverse tangent (arc tangent) of the input value.
 /// Returns a value in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanf(mut x: f32) -> f32 {
     let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs
index 9dc826f56..883ff150f 100644
--- a/libm/src/math/atanh.rs
+++ b/libm/src/math/atanh.rs
@@ -5,7 +5,7 @@ use super::log1p;
 ///
 /// Calculates the inverse hyperbolic tangent of `x`.
 /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanh(x: f64) -> f64 {
     let u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs
index 80ccec1f6..e4e356d18 100644
--- a/libm/src/math/atanhf.rs
+++ b/libm/src/math/atanhf.rs
@@ -5,7 +5,7 @@ use super::log1pf;
 ///
 /// Calculates the inverse hyperbolic tangent of `x`.
 /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanhf(mut x: f32) -> f32 {
     let mut u = x.to_bits();
     let sign = (u >> 31) != 0;
diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs
index cf56f7a97..e905e15f1 100644
--- a/libm/src/math/cbrt.rs
+++ b/libm/src/math/cbrt.rs
@@ -8,7 +8,7 @@ use super::Float;
 use super::support::{FpResult, Round, cold_path};
 
 /// Compute the cube root of the argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cbrt(x: f64) -> f64 {
     cbrt_round(x, Round::Nearest).val
 }
diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs
index 9d70305c6..9d6958483 100644
--- a/libm/src/math/cbrtf.rs
+++ b/libm/src/math/cbrtf.rs
@@ -25,7 +25,7 @@ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
 /// Cube root (f32)
 ///
 /// Computes the cube root of the argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cbrtf(x: f32) -> f32 {
     let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
 
diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs
index 4e1035457..2cac49f29 100644
--- a/libm/src/math/ceil.rs
+++ b/libm/src/math/ceil.rs
@@ -2,7 +2,7 @@
 ///
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf16(x: f16) -> f16 {
     super::generic::ceil(x)
 }
@@ -10,7 +10,7 @@ pub fn ceilf16(x: f16) -> f16 {
 /// Ceil (f32)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf(x: f32) -> f32 {
     select_implementation! {
         name: ceilf,
@@ -24,7 +24,7 @@ pub fn ceilf(x: f32) -> f32 {
 /// Ceil (f64)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
@@ -40,7 +40,7 @@ pub fn ceil(x: f64) -> f64 {
 ///
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf128(x: f128) -> f128 {
     super::generic::ceil(x)
 }
diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs
index d2a86e7fd..591a87a94 100644
--- a/libm/src/math/copysign.rs
+++ b/libm/src/math/copysign.rs
@@ -3,7 +3,7 @@
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf16(x: f16, y: f16) -> f16 {
     super::generic::copysign(x, y)
 }
@@ -12,7 +12,7 @@ pub fn copysignf16(x: f16, y: f16) -> f16 {
 ///
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf(x: f32, y: f32) -> f32 {
     super::generic::copysign(x, y)
 }
@@ -21,7 +21,7 @@ pub fn copysignf(x: f32, y: f32) -> f32 {
 ///
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysign(x: f64, y: f64) -> f64 {
     super::generic::copysign(x, y)
 }
@@ -31,7 +31,7 @@ pub fn copysign(x: f64, y: f64) -> f64 {
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf128(x: f128, y: f128) -> f128 {
     super::generic::copysign(x, y)
 }
@@ -59,9 +59,17 @@ mod tests {
 
         // Not required but we expect it
         assert_biteq!(f(F::NAN, F::NAN), F::NAN);
-        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NAN, F::ONE), F::NAN);
+        assert_biteq!(f(F::NAN, F::NEG_ONE), F::NEG_NAN);
         assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::ONE), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NEG_ONE), F::NEG_NAN);
         assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::ONE, F::NAN), F::ONE);
+        assert_biteq!(f(F::ONE, F::NEG_NAN), F::NEG_ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NAN), F::ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NEG_NAN), F::NEG_ONE);
     }
 
     #[test]
diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs
index de99cd4c5..b2f786323 100644
--- a/libm/src/math/cos.rs
+++ b/libm/src/math/cos.rs
@@ -45,7 +45,7 @@ use super::{k_cos, k_sin, rem_pio2};
 /// The cosine of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cos(x: f64) -> f64 {
     let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff;
 
diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs
index 27c2fc3b9..bf5cb9196 100644
--- a/libm/src/math/cosf.rs
+++ b/libm/src/math/cosf.rs
@@ -27,7 +27,7 @@ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The cosine of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cosf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs
index d2e43fd6c..01081cfc7 100644
--- a/libm/src/math/cosh.rs
+++ b/libm/src/math/cosh.rs
@@ -5,7 +5,7 @@ use super::{exp, expm1, k_expo2};
 /// Computes the hyperbolic cosine of the argument x.
 /// Is defined as `(exp(x) + exp(-x))/2`
 /// Angles are specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cosh(mut x: f64) -> f64 {
     /* |x| */
     let mut ix = x.to_bits();
diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs
index 567a24410..dc039a311 100644
--- a/libm/src/math/coshf.rs
+++ b/libm/src/math/coshf.rs
@@ -5,7 +5,7 @@ use super::{expf, expm1f, k_expo2f};
 /// Computes the hyperbolic cosine of the argument x.
 /// Is defined as `(exp(x) + exp(-x))/2`
 /// Angles are specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn coshf(mut x: f32) -> f32 {
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs
index 5d82228a0..6c78440af 100644
--- a/libm/src/math/erf.rs
+++ b/libm/src/math/erf.rs
@@ -219,7 +219,7 @@ fn erfc2(ix: u32, mut x: f64) -> f64 {
 /// Calculates an approximation to the “error function”, which estimates
 /// the probability that an observation will fall within x standard
 /// deviations of the mean (assuming a normal distribution).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn erf(x: f64) -> f64 {
     let r: f64;
     let s: f64;
diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs
index fe15f0108..2a7680275 100644
--- a/libm/src/math/erff.rs
+++ b/libm/src/math/erff.rs
@@ -130,7 +130,7 @@ fn erfc2(mut ix: u32, mut x: f32) -> f32 {
 /// Calculates an approximation to the “error function”, which estimates
 /// the probability that an observation will fall within x standard
 /// deviations of the mean (assuming a normal distribution).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn erff(x: f32) -> f32 {
     let r: f32;
     let s: f32;
diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs
index 782042b62..78ce5dd13 100644
--- a/libm/src/math/exp.rs
+++ b/libm/src/math/exp.rs
@@ -81,7 +81,7 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 ///
 /// Calculate the exponential of `x`, that is, *e* raised to the power `x`
 /// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp(mut x: f64) -> f64 {
     let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
     let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149
diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs
index 7c33c92b6..1f49f5e96 100644
--- a/libm/src/math/exp10.rs
+++ b/libm/src/math/exp10.rs
@@ -7,7 +7,7 @@ const P10: &[f64] = &[
 ];
 
 /// Calculates 10 raised to the power of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp10(x: f64) -> f64 {
     let (mut y, n) = modf(x);
     let u: u64 = n.to_bits();
diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs
index 303045b33..22a264211 100644
--- a/libm/src/math/exp10f.rs
+++ b/libm/src/math/exp10f.rs
@@ -7,7 +7,7 @@ const P10: &[f32] = &[
 ];
 
 /// Calculates 10 raised to the power of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp10f(x: f32) -> f32 {
     let (mut y, n) = modff(x);
     let u = n.to_bits();
diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs
index 6e98d066c..6e4cbc29d 100644
--- a/libm/src/math/exp2.rs
+++ b/libm/src/math/exp2.rs
@@ -322,7 +322,7 @@ static TBL: [u64; TBLSIZE * 2] = [
 /// Exponential, base 2 (f64)
 ///
 /// Calculate `2^x`, that is, 2 raised to the power `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp2(mut x: f64) -> f64 {
     let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64;
     let p1 = f64::from_bits(0x3fe62e42fefa39ef);
diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs
index f452b6a20..733d2f1a8 100644
--- a/libm/src/math/exp2f.rs
+++ b/libm/src/math/exp2f.rs
@@ -73,7 +73,7 @@ static EXP2FT: [u64; TBLSIZE] = [
 /// Exponential, base 2 (f32)
 ///
 /// Calculate `2^x`, that is, 2 raised to the power `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp2f(mut x: f32) -> f32 {
     let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
     let p1 = f32::from_bits(0x3f317218);
diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs
index 8dc067ab0..dbbfdbba9 100644
--- a/libm/src/math/expf.rs
+++ b/libm/src/math/expf.rs
@@ -30,7 +30,7 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
 ///
 /// Calculate the exponential of `x`, that is, *e* raised to the power `x`
 /// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expf(mut x: f32) -> f32 {
     let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
     let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126  /*original 0x1p-149f    ??????????? */
diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs
index f25153f32..3714bf3af 100644
--- a/libm/src/math/expm1.rs
+++ b/libm/src/math/expm1.rs
@@ -30,7 +30,7 @@ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
 /// system of logarithms, approximately 2.71828).
 /// The result is accurate even for small values of `x`,
 /// where using `exp(x)-1` would lose many significant digits.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expm1(mut x: f64) -> f64 {
     let hi: f64;
     let lo: f64;
diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs
index 63dc86e37..f77515a4b 100644
--- a/libm/src/math/expm1f.rs
+++ b/libm/src/math/expm1f.rs
@@ -32,7 +32,7 @@ const Q2: f32 = 1.5807170421e-3; /*  0xcf3010.0p-33 */
 /// system of logarithms, approximately 2.71828).
 /// The result is accurate even for small values of `x`,
 /// where using `exp(x)-1` would lose many significant digits.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expm1f(mut x: f32) -> f32 {
     let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
 
diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs
index 82e9b360a..ce90858ec 100644
--- a/libm/src/math/expo2.rs
+++ b/libm/src/math/expo2.rs
@@ -1,7 +1,7 @@
 use super::{combine_words, exp};
 
 /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn expo2(x: f64) -> f64 {
     /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */
     const K: i32 = 2043;
diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs
index 0050a309f..7344e21a1 100644
--- a/libm/src/math/fabs.rs
+++ b/libm/src/math/fabs.rs
@@ -3,7 +3,7 @@
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf16(x: f16) -> f16 {
     super::generic::fabs(x)
 }
@@ -12,7 +12,7 @@ pub fn fabsf16(x: f16) -> f16 {
 ///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf(x: f32) -> f32 {
     select_implementation! {
         name: fabsf,
@@ -27,7 +27,7 @@ pub fn fabsf(x: f32) -> f32 {
 ///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabs(x: f64) -> f64 {
     select_implementation! {
         name: fabs,
@@ -43,7 +43,7 @@ pub fn fabs(x: f64) -> f64 {
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf128(x: f128) -> f128 {
     super::generic::fabs(x)
 }
diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs
index 082c5478b..dac409e86 100644
--- a/libm/src/math/fdim.rs
+++ b/libm/src/math/fdim.rs
@@ -7,7 +7,7 @@
 ///
 /// A range error may occur.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf16(x: f16, y: f16) -> f16 {
     super::generic::fdim(x, y)
 }
@@ -20,7 +20,7 @@ pub fn fdimf16(x: f16, y: f16) -> f16 {
 /// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf(x: f32, y: f32) -> f32 {
     super::generic::fdim(x, y)
 }
@@ -33,7 +33,7 @@ pub fn fdimf(x: f32, y: f32) -> f32 {
 /// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdim(x: f64, y: f64) -> f64 {
     super::generic::fdim(x, y)
 }
@@ -47,7 +47,7 @@ pub fn fdim(x: f64, y: f64) -> f64 {
 ///
 /// A range error may occur.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf128(x: f128, y: f128) -> f128 {
     super::generic::fdim(x, y)
 }
diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs
index 3c5eab101..7241c427f 100644
--- a/libm/src/math/floor.rs
+++ b/libm/src/math/floor.rs
@@ -2,7 +2,7 @@
 ///
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf16(x: f16) -> f16 {
     return super::generic::floor(x);
 }
@@ -10,7 +10,7 @@ pub fn floorf16(x: f16) -> f16 {
 /// Floor (f64)
 ///
 /// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
@@ -25,7 +25,7 @@ pub fn floor(x: f64) -> f64 {
 /// Floor (f32)
 ///
 /// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf(x: f32) -> f32 {
     select_implementation! {
         name: floorf,
@@ -40,7 +40,7 @@ pub fn floorf(x: f32) -> f32 {
 ///
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf128(x: f128) -> f128 {
     return super::generic::floor(x);
 }
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
index 5bf473cfe..70e6de768 100644
--- a/libm/src/math/fma.rs
+++ b/libm/src/math/fma.rs
@@ -7,7 +7,7 @@ use crate::support::Round;
 // Placeholder so we can have `fmaf16` in the `Float` trait.
 #[allow(unused)]
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
     unimplemented!()
 }
@@ -15,7 +15,7 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
 /// Floating multiply add (f32)
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
     select_implementation! {
         name: fmaf,
@@ -32,7 +32,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 /// Fused multiply add (f64)
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
     select_implementation! {
         name: fma,
@@ -50,7 +50,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
     generic::fma_round(x, y, z, Round::Nearest).val
 }
diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs
index 2947b783e..c4c1b0435 100644
--- a/libm/src/math/fmin_fmax.rs
+++ b/libm/src/math/fmin_fmax.rs
@@ -3,7 +3,7 @@
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf16(x: f16, y: f16) -> f16 {
     super::generic::fmin(x, y)
 }
@@ -12,7 +12,7 @@ pub fn fminf16(x: f16, y: f16) -> f16 {
 ///
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf(x: f32, y: f32) -> f32 {
     super::generic::fmin(x, y)
 }
@@ -21,7 +21,7 @@ pub fn fminf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmin(x: f64, y: f64) -> f64 {
     super::generic::fmin(x, y)
 }
@@ -31,7 +31,7 @@ pub fn fmin(x: f64, y: f64) -> f64 {
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf128(x: f128, y: f128) -> f128 {
     super::generic::fmin(x, y)
 }
@@ -41,7 +41,7 @@ pub fn fminf128(x: f128, y: f128) -> f128 {
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf16(x: f16, y: f16) -> f16 {
     super::generic::fmax(x, y)
 }
@@ -50,7 +50,7 @@ pub fn fmaxf16(x: f16, y: f16) -> f16 {
 ///
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf(x: f32, y: f32) -> f32 {
     super::generic::fmax(x, y)
 }
@@ -59,7 +59,7 @@ pub fn fmaxf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmax(x: f64, y: f64) -> f64 {
     super::generic::fmax(x, y)
 }
@@ -69,7 +69,7 @@ pub fn fmax(x: f64, y: f64) -> f64 {
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf128(x: f128, y: f128) -> f128 {
     super::generic::fmax(x, y)
 }
@@ -82,22 +82,77 @@ mod tests {
     fn fmin_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between zeros and NaNs does not matter
+        assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO);
+        assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -125,22 +180,77 @@ mod tests {
     fn fmax_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between zeros and NaNs does not matter
+        assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO);
+        assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs
index b7999e273..a3c9c9c39 100644
--- a/libm/src/math/fminimum_fmaximum.rs
+++ b/libm/src/math/fminimum_fmaximum.rs
@@ -2,7 +2,7 @@
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf16(x: f16, y: f16) -> f16 {
     super::generic::fminimum(x, y)
 }
@@ -10,7 +10,7 @@ pub fn fminimumf16(x: f16, y: f16) -> f16 {
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum(x: f64, y: f64) -> f64 {
     super::generic::fminimum(x, y)
 }
@@ -18,7 +18,7 @@ pub fn fminimum(x: f64, y: f64) -> f64 {
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf(x: f32, y: f32) -> f32 {
     super::generic::fminimum(x, y)
 }
@@ -27,7 +27,7 @@ pub fn fminimumf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf128(x: f128, y: f128) -> f128 {
     super::generic::fminimum(x, y)
 }
@@ -36,7 +36,7 @@ pub fn fminimumf128(x: f128, y: f128) -> f128 {
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf16(x: f16, y: f16) -> f16 {
     super::generic::fmaximum(x, y)
 }
@@ -44,7 +44,7 @@ pub fn fmaximumf16(x: f16, y: f16) -> f16 {
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf(x: f32, y: f32) -> f32 {
     super::generic::fmaximum(x, y)
 }
@@ -52,7 +52,7 @@ pub fn fmaximumf(x: f32, y: f32) -> f32 {
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum(x: f64, y: f64) -> f64 {
     super::generic::fmaximum(x, y)
 }
@@ -61,7 +61,7 @@ pub fn fmaximum(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum(x, y)
 }
@@ -74,24 +74,77 @@ mod tests {
     fn fminimum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NAN),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::NAN),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NAN),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::NAN),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NAN),
             (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NEG_ZERO, F::NAN),
+            (F::NAN, F::ONE, F::NAN),
+            (F::NAN, F::NEG_ONE, F::NAN),
+            (F::NAN, F::INFINITY, F::NAN),
+            (F::NAN, F::NEG_INFINITY, F::NAN),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -119,24 +172,77 @@ mod tests {
     fn fmaximum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NAN),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::NAN),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NAN),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::NAN),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NAN),
             (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NEG_ZERO, F::NAN),
+            (F::NAN, F::ONE, F::NAN),
+            (F::NAN, F::NEG_ONE, F::NAN),
+            (F::NAN, F::INFINITY, F::NAN),
+            (F::NAN, F::NEG_INFINITY, F::NAN),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs
index 180d21f72..612cefe75 100644
--- a/libm/src/math/fminimum_fmaximum_num.rs
+++ b/libm/src/math/fminimum_fmaximum_num.rs
@@ -2,7 +2,7 @@
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
     super::generic::fminimum_num(x, y)
 }
@@ -10,7 +10,7 @@ pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
 /// Return the lesser of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf(x: f32, y: f32) -> f32 {
     super::generic::fminimum_num(x, y)
 }
@@ -18,7 +18,7 @@ pub fn fminimum_numf(x: f32, y: f32) -> f32 {
 /// Return the lesser of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_num(x: f64, y: f64) -> f64 {
     super::generic::fminimum_num(x, y)
 }
@@ -27,7 +27,7 @@ pub fn fminimum_num(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
     super::generic::fminimum_num(x, y)
 }
@@ -36,7 +36,7 @@ pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
     super::generic::fmaximum_num(x, y)
 }
@@ -44,7 +44,7 @@ pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
 /// Return the greater of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
     super::generic::fmaximum_num(x, y)
 }
@@ -52,7 +52,7 @@ pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
 /// Return the greater of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_num(x: f64, y: f64) -> f64 {
     super::generic::fmaximum_num(x, y)
 }
@@ -61,7 +61,7 @@ pub fn fmaximum_num(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum_num(x, y)
 }
@@ -74,24 +74,77 @@ mod tests {
     fn fminimum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
-        for (x, y, res) in cases {
-            let val = f(x, y);
-            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
+        for (x, y, expected) in cases {
+            let actual = f(x, y);
+            assert_biteq!(actual, expected, "fminimum_num({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -119,24 +172,77 @@ mod tests {
     fn fmaximum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
-        for (x, y, res) in cases {
-            let val = f(x, y);
-            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
+        for (x, y, expected) in cases {
+            let actual = f(x, y);
+            assert_biteq!(actual, expected, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs
index c4752b925..6ae1be560 100644
--- a/libm/src/math/fmod.rs
+++ b/libm/src/math/fmod.rs
@@ -1,25 +1,25 @@
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf16(x: f16, y: f16) -> f16 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf(x: f32, y: f32) -> f32 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf128(x: f128, y: f128) -> f128 {
     super::generic::fmod(x, y)
 }
diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs
index de7a64fda..932111eeb 100644
--- a/libm/src/math/frexp.rs
+++ b/libm/src/math/frexp.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn frexp(x: f64) -> (f64, i32) {
     let mut y = x.to_bits();
     let ee = ((y >> 52) & 0x7ff) as i32;
diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs
index 0ec91c2d3..904bf14f7 100644
--- a/libm/src/math/frexpf.rs
+++ b/libm/src/math/frexpf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn frexpf(x: f32) -> (f32, i32) {
     let mut y = x.to_bits();
     let ee: i32 = ((y >> 23) & 0xff) as i32;
diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs
index 54207e4b3..b05804704 100644
--- a/libm/src/math/generic/fmax.rs
+++ b/libm/src/math/generic/fmax.rs
@@ -19,6 +19,5 @@ use crate::support::Float;
 #[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() || x < y { y } else { x };
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
index 4b6295bc0..55a031e18 100644
--- a/libm/src/math/generic/fmaximum.rs
+++ b/libm/src/math/generic/fmaximum.rs
@@ -4,8 +4,8 @@
 //! Per the spec, returns the canonicalized result of:
 //! - `x` if `x > y`
 //! - `y` if `y > x`
+//! - +0.0 if x and y are zero with opposite signs
 //! - qNaN if either operation is NaN
-//! - Logic following +0.0 > -0.0
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -17,12 +17,11 @@ pub fn fmaximum<F: Float>(x: F, y: F) -> F {
         x
     } else if y.is_nan() {
         y
-    } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) {
+    } else if x > y || (y.biteq(F::NEG_ZERO) && x.is_sign_positive()) {
         x
     } else {
         y
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
index 2e97ff6d3..2dc60b2d2 100644
--- a/libm/src/math/generic/fmaximum_num.rs
+++ b/libm/src/math/generic/fmaximum_num.rs
@@ -4,10 +4,10 @@
 //! Per the spec, returns:
 //! - `x` if `x > y`
 //! - `y` if `y > x`
-//! - Non-NaN if one operand is NaN
-//! - Logic following +0.0 > -0.0
+//! - +0.0 if x and y are zero with opposite signs
 //! - Either `x` or `y` if `x == y` and the signs are the same
-//! - qNaN if either operand is a NaN
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -15,13 +15,15 @@ use crate::support::Float;
 
 #[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
-    let res =
-        if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
-            y
-        } else {
-            x
-        };
+    let res = if x > y || y.is_nan() {
+        x
+    } else if y > x || x.is_nan() {
+        y
+    } else if x.is_sign_positive() {
+        x
+    } else {
+        y
+    };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs
index 0f86364d2..e2245bf9e 100644
--- a/libm/src/math/generic/fmin.rs
+++ b/libm/src/math/generic/fmin.rs
@@ -19,6 +19,5 @@ use crate::support::Float;
 #[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
     let res = if y.is_nan() || x < y { x } else { y };
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
index 9dc0b64be..aa68b1291 100644
--- a/libm/src/math/generic/fminimum.rs
+++ b/libm/src/math/generic/fminimum.rs
@@ -4,8 +4,8 @@
 //! Per the spec, returns the canonicalized result of:
 //! - `x` if `x < y`
 //! - `y` if `y < x`
+//! - -0.0 if x and y are zero with opposite signs
 //! - qNaN if either operation is NaN
-//! - Logic following +0.0 > -0.0
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -17,12 +17,11 @@ pub fn fminimum<F: Float>(x: F, y: F) -> F {
         x
     } else if y.is_nan() {
         y
-    } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+    } else if x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
         x
     } else {
         y
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
index 40db8b189..265bd4605 100644
--- a/libm/src/math/generic/fminimum_num.rs
+++ b/libm/src/math/generic/fminimum_num.rs
@@ -4,10 +4,10 @@
 //! Per the spec, returns:
 //! - `x` if `x < y`
 //! - `y` if `y < x`
-//! - Non-NaN if one operand is NaN
-//! - Logic following +0.0 > -0.0
+//! - -0.0 if x and y are zero with opposite signs
 //! - Either `x` or `y` if `x == y` and the signs are the same
-//! - qNaN if either operand is a NaN
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -15,13 +15,15 @@ use crate::support::Float;
 
 #[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
-    let res =
-        if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
-            x
-        } else {
-            y
-        };
+    let res = if x > y || x.is_nan() {
+        y
+    } else if y > x || y.is_nan() {
+        x
+    } else if x.is_sign_positive() {
+        y
+    } else {
+        x
+    };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs
index da458ea1d..b92ee18ca 100644
--- a/libm/src/math/hypot.rs
+++ b/libm/src/math/hypot.rs
@@ -17,7 +17,7 @@ fn sq(x: f64) -> (f64, f64) {
     (hi, lo)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn hypot(mut x: f64, mut y: f64) -> f64 {
     let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
     let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs
index 576eebb33..e7635ffc9 100644
--- a/libm/src/math/hypotf.rs
+++ b/libm/src/math/hypotf.rs
@@ -2,7 +2,7 @@ use core::f32;
 
 use super::sqrtf;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn hypotf(mut x: f32, mut y: f32) -> f32 {
     let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90
     let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90
diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs
index 5b41f7b1d..ef774f6ad 100644
--- a/libm/src/math/ilogb.rs
+++ b/libm/src/math/ilogb.rs
@@ -1,7 +1,7 @@
 const FP_ILOGBNAN: i32 = -1 - 0x7fffffff;
 const FP_ILOGB0: i32 = FP_ILOGBNAN;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ilogb(x: f64) -> i32 {
     let mut i: u64 = x.to_bits();
     let e = ((i >> 52) & 0x7ff) as i32;
diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs
index 3585d6d36..5b0cb46ec 100644
--- a/libm/src/math/ilogbf.rs
+++ b/libm/src/math/ilogbf.rs
@@ -1,7 +1,7 @@
 const FP_ILOGBNAN: i32 = -1 - 0x7fffffff;
 const FP_ILOGB0: i32 = FP_ILOGBNAN;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ilogbf(x: f32) -> i32 {
     let mut i = x.to_bits();
     let e = ((i >> 23) & 0xff) as i32;
diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs
index 99d656f0d..7b0800477 100644
--- a/libm/src/math/j0.rs
+++ b/libm/src/math/j0.rs
@@ -110,7 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */
 const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j0(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
@@ -165,7 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */
 const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y0(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs
index 25e5b325c..1c6a7c344 100644
--- a/libm/src/math/j0f.rs
+++ b/libm/src/math/j0f.rs
@@ -63,7 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */
 const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j0f(mut x: f32) -> f32 {
     let z: f32;
     let r: f32;
@@ -110,7 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */
 const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y0f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs
index 9b604d9e4..7d304ba10 100644
--- a/libm/src/math/j1.rs
+++ b/libm/src/math/j1.rs
@@ -114,7 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */
 const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j1(x: f64) -> f64 {
     let mut z: f64;
     let r: f64;
@@ -161,7 +161,7 @@ const V0: [f64; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y1(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs
index a47472401..cd829c1aa 100644
--- a/libm/src/math/j1f.rs
+++ b/libm/src/math/j1f.rs
@@ -64,7 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */
 const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j1f(x: f32) -> f32 {
     let mut z: f32;
     let r: f32;
@@ -110,7 +110,7 @@ const V0: [f32; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y1f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
@@ -361,8 +361,6 @@ fn qonef(x: f32) -> f32 {
     return (0.375 + r / s) / x;
 }
 
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
     use super::{j1f, y1f};
@@ -371,6 +369,7 @@ mod tests {
         // 0x401F3E49
         assert_eq!(j1f(2.4881766_f32), 0.49999475_f32);
     }
+
     #[test]
     fn test_y1f_2002() {
         //allow slightly different result on x87
diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs
index 31f8d9c53..b87aeaf1c 100644
--- a/libm/src/math/jn.rs
+++ b/libm/src/math/jn.rs
@@ -39,7 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0,
 const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn jn(n: i32, mut x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
@@ -249,7 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn yn(n: i32, x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs
index 52cf7d8a8..34fdc5112 100644
--- a/libm/src/math/jnf.rs
+++ b/libm/src/math/jnf.rs
@@ -16,7 +16,7 @@
 use super::{fabsf, j0f, j1f, logf, y0f, y1f};
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn jnf(n: i32, mut x: f32) -> f32 {
     let mut ix: u32;
     let mut nm1: i32;
@@ -192,7 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ynf(n: i32, x: f32) -> f32 {
     let mut ix: u32;
     let mut ib: u32;
diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs
index 49b2fc64d..1a2ebabe3 100644
--- a/libm/src/math/k_cos.rs
+++ b/libm/src/math/k_cos.rs
@@ -51,7 +51,7 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 //         expression for cos().  Retention happens in all cases tested
 //         under FreeBSD, so don't pessimize things by forcibly clipping
 //         any extra precision in w.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_cos(x: f64, y: f64) -> f64 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs
index e99f2348c..68f568c24 100644
--- a/libm/src/math/k_cosf.rs
+++ b/libm/src/math/k_cosf.rs
@@ -20,7 +20,7 @@ const C1: f64 = 0.0416666233237390631894; /*  0x155553e1053a42.0p-57 */
 const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */
 const C3: f64 = 0.0000243904487962774090654; /*  0x199342e0ee5069.0p-68 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_cosf(x: f64) -> f32 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs
index 7345075f3..7b63952d2 100644
--- a/libm/src/math/k_expo2.rs
+++ b/libm/src/math/k_expo2.rs
@@ -4,7 +4,7 @@ use super::exp;
 const K: i32 = 2043;
 
 /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_expo2(x: f64) -> f64 {
     let k_ln2 = f64::from_bits(0x40962066151add8b);
     /* note that k is odd and scale*scale overflows */
diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs
index fbd7b27d5..02213cec4 100644
--- a/libm/src/math/k_expo2f.rs
+++ b/libm/src/math/k_expo2f.rs
@@ -4,7 +4,7 @@ use super::expf;
 const K: i32 = 235;
 
 /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_expo2f(x: f32) -> f32 {
     let k_ln2 = f32::from_bits(0x4322e3bc);
     /* note that k is odd and scale*scale overflows */
diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs
index 9dd96c944..2f8542945 100644
--- a/libm/src/math/k_sin.rs
+++ b/libm/src/math/k_sin.rs
@@ -43,7 +43,7 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 //              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
 //         then                   3    2
 //              sin(x) = x + (S1*x + (x *(r-y/2)+y))
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs
index 88d10caba..297d88bbb 100644
--- a/libm/src/math/k_sinf.rs
+++ b/libm/src/math/k_sinf.rs
@@ -20,7 +20,7 @@ const S2: f64 = 0.0083333293858894631756; /*  0x111110896efbb2.0p-59 */
 const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */
 const S4: f64 = 0.0000027183114939898219064; /*  0x16cd878c3b46a7.0p-71 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_sinf(x: f64) -> f32 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs
index d177010bb..ac48d661f 100644
--- a/libm/src/math/k_tan.rs
+++ b/libm/src/math/k_tan.rs
@@ -58,7 +58,7 @@ static T: [f64; 13] = [
 const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */
 const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 {
     let hx = (f64::to_bits(x) >> 32) as u32;
     let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs
index af8db539d..79382f57b 100644
--- a/libm/src/math/k_tanf.rs
+++ b/libm/src/math/k_tanf.rs
@@ -19,7 +19,7 @@ const T: [f64; 6] = [
     0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */
 ];
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 {
     let z = x * x;
     /*
diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs
index 24899ba30..b32b8d524 100644
--- a/libm/src/math/ldexp.rs
+++ b/libm/src/math/ldexp.rs
@@ -1,21 +1,21 @@
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf16(x: f16, n: i32) -> f16 {
     super::scalbnf16(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf(x: f32, n: i32) -> f32 {
     super::scalbnf(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexp(x: f64, n: i32) -> f64 {
     super::scalbn(x, n)
 }
 
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf128(x: f128, n: i32) -> f128 {
     super::scalbnf128(x, n)
 }
diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs
index 8312dc186..da7ce5c98 100644
--- a/libm/src/math/lgamma.rs
+++ b/libm/src/math/lgamma.rs
@@ -2,7 +2,7 @@ use super::lgamma_r;
 
 /// The natural logarithm of the
 /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgamma(x: f64) -> f64 {
     lgamma_r(x).0
 }
diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs
index 6becaad2c..38eb270f6 100644
--- a/libm/src/math/lgamma_r.rs
+++ b/libm/src/math/lgamma_r.rs
@@ -165,7 +165,7 @@ fn sin_pi(mut x: f64) -> f64 {
     }
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgamma_r(mut x: f64) -> (f64, i32) {
     let u: u64 = x.to_bits();
     let mut t: f64;
diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs
index d37512397..920acfed2 100644
--- a/libm/src/math/lgammaf.rs
+++ b/libm/src/math/lgammaf.rs
@@ -2,7 +2,7 @@ use super::lgammaf_r;
 
 /// The natural logarithm of the
 /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgammaf(x: f32) -> f32 {
     lgammaf_r(x).0
 }
diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs
index 10cecee54..a0b6a678a 100644
--- a/libm/src/math/lgammaf_r.rs
+++ b/libm/src/math/lgammaf_r.rs
@@ -100,7 +100,7 @@ fn sin_pi(mut x: f32) -> f32 {
     }
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgammaf_r(mut x: f32) -> (f32, i32) {
     let u = x.to_bits();
     let mut t: f32;
diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs
index f2dc47ec5..9499c56d8 100644
--- a/libm/src/math/log.rs
+++ b/libm/src/math/log.rs
@@ -71,7 +71,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The natural logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs
index 8c9d68c49..29f25d944 100644
--- a/libm/src/math/log10.rs
+++ b/libm/src/math/log10.rs
@@ -32,7 +32,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The base 10 logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log10(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs
index 18bf8fcc8..f89584bf9 100644
--- a/libm/src/math/log10f.rs
+++ b/libm/src/math/log10f.rs
@@ -26,7 +26,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The base 10 logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log10f(mut x: f32) -> f32 {
     let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs
index 65142c0d6..c991cce60 100644
--- a/libm/src/math/log1p.rs
+++ b/libm/src/math/log1p.rs
@@ -66,7 +66,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The natural logarithm of 1+`x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log1p(x: f64) -> f64 {
     let mut ui: u64 = x.to_bits();
     let hfsq: f64;
diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs
index 23978e61c..89a92fac9 100644
--- a/libm/src/math/log1pf.rs
+++ b/libm/src/math/log1pf.rs
@@ -21,7 +21,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The natural logarithm of 1+`x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log1pf(x: f32) -> f32 {
     let mut ui: u32 = x.to_bits();
     let hfsq: f32;
diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs
index 701f63c25..9b750c9a2 100644
--- a/libm/src/math/log2.rs
+++ b/libm/src/math/log2.rs
@@ -30,7 +30,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The base 2 logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log2(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs
index 5ba2427d1..0e5177d7a 100644
--- a/libm/src/math/log2f.rs
+++ b/libm/src/math/log2f.rs
@@ -24,7 +24,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The base 2 logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log2f(mut x: f32) -> f32 {
     let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs
index 68d194302..cd7a7b0ba 100644
--- a/libm/src/math/logf.rs
+++ b/libm/src/math/logf.rs
@@ -22,7 +22,7 @@ const LG3: f32 = 0.28498786688; /*  0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /*  0xf89e26.0p-26 */
 
 /// The natural logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn logf(mut x: f32) -> f32 {
     let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs
index ce9b8fc58..8eecfe566 100644
--- a/libm/src/math/mod.rs
+++ b/libm/src/math/mod.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::approx_constant)] // many false positives
+
 macro_rules! force_eval {
     ($e:expr) => {
         unsafe { ::core::ptr::read_volatile(&$e) }
diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs
index 6541862cd..a92a83dc5 100644
--- a/libm/src/math/modf.rs
+++ b/libm/src/math/modf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn modf(x: f64) -> (f64, f64) {
     let rv2: f64;
     let mut u = x.to_bits();
diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs
index 90c6bca7d..691f351ca 100644
--- a/libm/src/math/modff.rs
+++ b/libm/src/math/modff.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn modff(x: f32) -> (f32, f32) {
     let rv2: f32;
     let mut u: u32 = x.to_bits();
diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs
index c991ff6f2..f4408468c 100644
--- a/libm/src/math/nextafter.rs
+++ b/libm/src/math/nextafter.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn nextafter(x: f64, y: f64) -> f64 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs
index 8ba383356..c15eb9de2 100644
--- a/libm/src/math/nextafterf.rs
+++ b/libm/src/math/nextafterf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn nextafterf(x: f32, y: f32) -> f32 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs
index 94ae31cf0..914d68cfc 100644
--- a/libm/src/math/pow.rs
+++ b/libm/src/math/pow.rs
@@ -90,7 +90,7 @@ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/l
 const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/
 
 /// Returns `x` to the power of `y` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn pow(x: f64, y: f64) -> f64 {
     let t1: f64;
     let t2: f64;
diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs
index 11c7a7cbd..17772ae87 100644
--- a/libm/src/math/powf.rs
+++ b/libm/src/math/powf.rs
@@ -46,7 +46,7 @@ const IVLN2_H: f32 = 1.4426879883e+00;
 const IVLN2_L: f32 = 7.0526075433e-06;
 
 /// Returns `x` to the power of `y` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn powf(x: f32, y: f32) -> f32 {
     let mut z: f32;
     let mut ax: f32;
diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
index d677fd9dc..61b103027 100644
--- a/libm/src/math/rem_pio2.rs
+++ b/libm/src/math/rem_pio2.rs
@@ -41,7 +41,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 // use rem_pio2_large() for large x
 //
 // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
     let x1p24 = f64::from_bits(0x4170000000000000);
 
@@ -195,7 +195,7 @@ mod tests {
 
     #[test]
     // FIXME(correctness): inaccurate results on i586
-    #[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)]
+    #[cfg_attr(x86_no_sse, ignore)]
     fn test_near_pi() {
         let arg = 3.141592025756836;
         let arg = force_eval!(arg);
diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs
index 6d679bbe9..f1fdf3673 100644
--- a/libm/src/math/rem_pio2_large.rs
+++ b/libm/src/math/rem_pio2_large.rs
@@ -11,7 +11,7 @@
  * ====================================================
  */
 
-use super::{floor, scalbn};
+use super::scalbn;
 
 // initial value for jk
 const INIT_JK: [usize; 4] = [3, 4, 4, 6];
@@ -221,8 +221,16 @@ const PIO2: [f64; 8] = [
 /// skip the part of the product that are known to be a huge integer (
 /// more accurately, = 0 mod 8 ). Thus the number of operations are
 /// independent of the exponent of the input.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 {
+    // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail
+    // on the callers of this function. As a workaround, avoid inlining `floor` here
+    // when implemented with assembly.
+    #[cfg_attr(x86_no_sse, inline(never))]
+    extern "C" fn floor(x: f64) -> f64 {
+        super::floor(x)
+    }
+
     let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
     let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
 
diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs
index 3c658fe3d..0472a1035 100644
--- a/libm/src/math/rem_pio2f.rs
+++ b/libm/src/math/rem_pio2f.rs
@@ -31,7 +31,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
 ///
 /// use double precision for everything except passing x
 /// use __rem_pio2_large() for large x
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) {
     let x64 = x as f64;
 
diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs
index 9e966c9ed..54152df32 100644
--- a/libm/src/math/remainder.rs
+++ b/libm/src/math/remainder.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remainder(x: f64, y: f64) -> f64 {
     let (result, _) = super::remquo(x, y);
     result
diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs
index b1407cf2a..21f629214 100644
--- a/libm/src/math/remainderf.rs
+++ b/libm/src/math/remainderf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remainderf(x: f32, y: f32) -> f32 {
     let (result, _) = super::remquof(x, y);
     result
diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs
index 4c11e8487..f13b09237 100644
--- a/libm/src/math/remquo.rs
+++ b/libm/src/math/remquo.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) {
     let ux: u64 = x.to_bits();
     let mut uy: u64 = y.to_bits();
diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs
index b0e85ca66..cc7863a09 100644
--- a/libm/src/math/remquof.rs
+++ b/libm/src/math/remquof.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) {
     let ux: u32 = x.to_bits();
     let mut uy: u32 = y.to_bits();
diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs
index e1c32c943..011a7ae3d 100644
--- a/libm/src/math/rint.rs
+++ b/libm/src/math/rint.rs
@@ -2,7 +2,7 @@ use super::support::Round;
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf16(x: f16) -> f16 {
     select_implementation! {
         name: rintf16,
@@ -14,7 +14,7 @@ pub fn rintf16(x: f16) -> f16 {
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf(x: f32) -> f32 {
     select_implementation! {
         name: rintf,
@@ -29,7 +29,7 @@ pub fn rintf(x: f32) -> f32 {
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rint(x: f64) -> f64 {
     select_implementation! {
         name: rint,
@@ -45,7 +45,7 @@ pub fn rint(x: f64) -> f64 {
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf128(x: f128) -> f128 {
     super::generic::rint_round(x, Round::Nearest).val
 }
diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs
index 6cd091cd7..256197e6c 100644
--- a/libm/src/math/round.rs
+++ b/libm/src/math/round.rs
@@ -1,25 +1,25 @@
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf16(x: f16) -> f16 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf(x: f32) -> f32 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf128(x: f128) -> f128 {
     super::generic::round(x)
 }
diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs
index 6e621d762..f0d67d410 100644
--- a/libm/src/math/roundeven.rs
+++ b/libm/src/math/roundeven.rs
@@ -3,21 +3,21 @@ use super::support::{Float, Round};
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf16(x: f16) -> f16 {
     roundeven_impl(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf(x: f32) -> f32 {
     roundeven_impl(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundeven(x: f64) -> f64 {
     roundeven_impl(x)
 }
@@ -25,7 +25,7 @@ pub fn roundeven(x: f64) -> f64 {
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf128(x: f128) -> f128 {
     roundeven_impl(x)
 }
diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs
index ed73c3f94..f1a67cb7f 100644
--- a/libm/src/math/scalbn.rs
+++ b/libm/src/math/scalbn.rs
@@ -1,21 +1,21 @@
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf16(x: f16, n: i32) -> f16 {
     super::generic::scalbn(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf(x: f32, n: i32) -> f32 {
     super::generic::scalbn(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbn(x: f64, n: i32) -> f64 {
     super::generic::scalbn(x, n)
 }
 
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf128(x: f128, n: i32) -> f128 {
     super::generic::scalbn(x, n)
 }
diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs
index 229fa4bef..5378a7bc3 100644
--- a/libm/src/math/sin.rs
+++ b/libm/src/math/sin.rs
@@ -44,7 +44,7 @@ use super::{k_cos, k_sin, rem_pio2};
 /// The sine of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sin(x: f64) -> f64 {
     let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs
index ebf482f2d..a364f7375 100644
--- a/libm/src/math/sincos.rs
+++ b/libm/src/math/sincos.rs
@@ -15,7 +15,7 @@ use super::{get_high_word, k_cos, k_sin, rem_pio2};
 /// Both the sine and cosine of `x` (f64).
 ///
 /// `x` is specified in radians and the return value is (sin(x), cos(x)).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sincos(x: f64) -> (f64, f64) {
     let s: f64;
     let c: f64;
diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs
index f33607676..c4beb5267 100644
--- a/libm/src/math/sincosf.rs
+++ b/libm/src/math/sincosf.rs
@@ -26,7 +26,7 @@ const S4PIO2: f64 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */
 /// Both the sine and cosine of `x` (f32).
 ///
 /// `x` is specified in radians and the return value is (sin(x), cos(x)).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sincosf(x: f32) -> (f32, f32) {
     let s: f32;
     let c: f32;
diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs
index 709b63fcf..b4edf6769 100644
--- a/libm/src/math/sinf.rs
+++ b/libm/src/math/sinf.rs
@@ -27,7 +27,7 @@ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The sine of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs
index 791841982..900dd6ca4 100644
--- a/libm/src/math/sinh.rs
+++ b/libm/src/math/sinh.rs
@@ -6,7 +6,7 @@ use super::{expm1, expo2};
 //
 
 /// The hyperbolic sine of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinh(x: f64) -> f64 {
     // union {double f; uint64_t i;} u = {.f = x};
     // uint32_t w;
diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs
index 44d2e3560..501acea30 100644
--- a/libm/src/math/sinhf.rs
+++ b/libm/src/math/sinhf.rs
@@ -1,7 +1,7 @@
 use super::{expm1f, k_expo2f};
 
 /// The hyperbolic sine of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinhf(x: f32) -> f32 {
     let mut h = 0.5f32;
     let mut ix = x.to_bits();
diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs
index 76bc240cf..7ba1bc9b3 100644
--- a/libm/src/math/sqrt.rs
+++ b/libm/src/math/sqrt.rs
@@ -1,6 +1,6 @@
 /// The square root of `x` (f16).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf16(x: f16) -> f16 {
     select_implementation! {
         name: sqrtf16,
@@ -12,7 +12,7 @@ pub fn sqrtf16(x: f16) -> f16 {
 }
 
 /// The square root of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf(x: f32) -> f32 {
     select_implementation! {
         name: sqrtf,
@@ -28,7 +28,7 @@ pub fn sqrtf(x: f32) -> f32 {
 }
 
 /// The square root of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
     select_implementation! {
         name: sqrt,
@@ -45,7 +45,7 @@ pub fn sqrt(x: f64) -> f64 {
 
 /// The square root of `x` (f128).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf128(x: f128) -> f128 {
     return super::generic::sqrt(x);
 }
diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs
index 8a52d86cc..b7f128542 100644
--- a/libm/src/math/support/big.rs
+++ b/libm/src/math/support/big.rs
@@ -11,10 +11,10 @@ const U128_LO_MASK: u128 = u64::MAX as u128;
 
 /// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct u256 {
-    pub lo: u128,
     pub hi: u128,
+    pub lo: u128,
 }
 
 impl u256 {
@@ -28,17 +28,17 @@ impl u256 {
     pub fn signed(self) -> i256 {
         i256 {
             lo: self.lo,
-            hi: self.hi,
+            hi: self.hi as i128,
         }
     }
 }
 
 /// A 256-bit signed integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct i256 {
+    pub hi: i128,
     pub lo: u128,
-    pub hi: u128,
 }
 
 impl i256 {
@@ -47,7 +47,7 @@ impl i256 {
     pub fn unsigned(self) -> u256 {
         u256 {
             lo: self.lo,
-            hi: self.hi,
+            hi: self.hi as u128,
         }
     }
 }
@@ -73,17 +73,17 @@ impl MinInt for i256 {
 
     type Unsigned = u256;
 
-    const SIGNED: bool = false;
+    const SIGNED: bool = true;
     const BITS: u32 = 256;
     const ZERO: Self = Self { lo: 0, hi: 0 };
     const ONE: Self = Self { lo: 1, hi: 0 };
     const MIN: Self = Self {
-        lo: 0,
-        hi: 1 << 127,
+        lo: u128::MIN,
+        hi: i128::MIN,
     };
     const MAX: Self = Self {
         lo: u128::MAX,
-        hi: u128::MAX >> 1,
+        hi: i128::MAX,
     };
 }
 
@@ -109,60 +109,86 @@ macro_rules! impl_common {
             }
         }
 
-        impl ops::Shl<u32> for $ty {
+        impl ops::Add<Self> for $ty {
             type Output = Self;
 
-            fn shl(self, _rhs: u32) -> Self::Output {
-                unimplemented!("only used to meet trait bounds")
+            fn add(self, rhs: Self) -> Self::Output {
+                let (lo, carry) = self.lo.overflowing_add(rhs.lo);
+                let (hi, of) = Int::carrying_add(self.hi, rhs.hi, carry);
+                debug_assert!(!of, "attempt to add with overflow");
+                Self { lo, hi }
             }
         }
-    };
-}
 
-impl_common!(i256);
-impl_common!(u256);
+        impl ops::Sub<Self> for $ty {
+            type Output = Self;
 
-impl ops::Add<Self> for u256 {
-    type Output = Self;
+            fn sub(self, rhs: Self) -> Self::Output {
+                let (lo, borrow) = self.lo.overflowing_sub(rhs.lo);
+                let (hi, of) = Int::borrowing_sub(self.hi, rhs.hi, borrow);
+                debug_assert!(!of, "attempt to subtract with overflow");
+                Self { lo, hi }
+            }
+        }
 
-    fn add(self, rhs: Self) -> Self::Output {
-        let (lo, carry) = self.lo.overflowing_add(rhs.lo);
-        let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi);
+        impl ops::Shl<u32> for $ty {
+            type Output = Self;
 
-        Self { lo, hi }
-    }
-}
+            fn shl(mut self, rhs: u32) -> Self::Output {
+                debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow");
 
-impl ops::Shr<u32> for u256 {
-    type Output = Self;
+                let half_bits = Self::BITS / 2;
+                let low_mask = half_bits - 1;
+                let s = rhs & low_mask;
 
-    fn shr(mut self, rhs: u32) -> Self::Output {
-        debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
-        if rhs >= Self::BITS {
-            return Self::ZERO;
-        }
+                let lo = self.lo;
+                let hi = self.hi;
 
-        if rhs == 0 {
-            return self;
-        }
+                self.lo = lo << s;
 
-        if rhs < 128 {
-            self.lo >>= rhs;
-            self.lo |= self.hi << (128 - rhs);
-        } else {
-            self.lo = self.hi >> (rhs - 128);
+                if rhs & half_bits == 0 {
+                    self.hi = (lo >> (low_mask ^ s) >> 1) as _;
+                    self.hi |= hi << s;
+                } else {
+                    self.hi = self.lo as _;
+                    self.lo = 0;
+                }
+                self
+            }
         }
 
-        if rhs < 128 {
-            self.hi >>= rhs;
-        } else {
-            self.hi = 0;
-        }
+        impl ops::Shr<u32> for $ty {
+            type Output = Self;
 
-        self
-    }
+            fn shr(mut self, rhs: u32) -> Self::Output {
+                debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow");
+
+                let half_bits = Self::BITS / 2;
+                let low_mask = half_bits - 1;
+                let s = rhs & low_mask;
+
+                let lo = self.lo;
+                let hi = self.hi;
+
+                self.hi = hi >> s;
+
+                #[allow(unused_comparisons)]
+                if rhs & half_bits == 0 {
+                    self.lo = (hi << (low_mask ^ s) << 1) as _;
+                    self.lo |= lo >> s;
+                } else {
+                    self.lo = self.hi as _;
+                    self.hi = if hi < 0 { !0 } else { 0 };
+                }
+                self
+            }
+        }
+    };
 }
 
+impl_common!(i256);
+impl_common!(u256);
+
 impl HInt for u128 {
     type D = u256;
 
@@ -200,7 +226,7 @@ impl HInt for u128 {
     }
 
     fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
+        u256 { lo: 0, hi: self }
     }
 }
 
@@ -208,11 +234,10 @@ impl HInt for i128 {
     type D = i256;
 
     fn widen(self) -> Self::D {
-        let mut ret = self.unsigned().zero_widen().signed();
-        if self.is_negative() {
-            ret.hi = u128::MAX;
+        i256 {
+            lo: self as u128,
+            hi: if self < 0 { -1 } else { 0 },
         }
-        ret
     }
 
     fn zero_widen(self) -> Self::D {
@@ -228,7 +253,7 @@ impl HInt for i128 {
     }
 
     fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
+        i256 { lo: 0, hi: self }
     }
 }
 
@@ -252,6 +277,6 @@ impl DInt for i256 {
     }
 
     fn hi(self) -> Self::H {
-        self.hi as i128
+        self.hi
     }
 }
diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs
index d2010f021..d54706c72 100644
--- a/libm/src/math/support/big/tests.rs
+++ b/libm/src/math/support/big/tests.rs
@@ -36,7 +36,7 @@ fn widen_i128() {
         (LOHI_SPLIT as i128).widen(),
         i256 {
             lo: LOHI_SPLIT,
-            hi: u128::MAX
+            hi: -1,
         }
     );
     assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
@@ -275,3 +275,64 @@ fn shr_u256_overflow() {
     assert_eq!(u256::MAX >> 257, u256::ZERO);
     assert_eq!(u256::MAX >> u32::MAX, u256::ZERO);
 }
+
+#[test]
+fn u256_ord() {
+    let _1 = u256::ONE;
+    let _2 = _1 + _1;
+    for x in u8::MIN..u8::MAX {
+        let y = x + 1;
+        let wx = (x as u128).widen_hi();
+        let wy = (y as u128).widen_hi();
+        assert!([wx, wx + _1, wx + _2, wy, wy + _1, wy + _2].is_sorted());
+    }
+}
+#[test]
+fn i256_ord() {
+    let _1 = i256::ONE;
+    let _2 = _1 + _1;
+    for x in i8::MIN..i8::MAX {
+        let y = x + 1;
+        let wx = (x as i128).widen_hi();
+        let wy = (y as i128).widen_hi();
+        assert!([wx, wx + _1, wx + _2, wy - _2, wy - _1, wy].is_sorted());
+    }
+}
+
+#[test]
+fn u256_shifts() {
+    let _1 = u256::ONE;
+    for k in 0..255 {
+        let x = _1 << k;
+        let x2 = _1 << (k + 1);
+        assert!(x < x2);
+        assert_eq!(x << 1, x2);
+        assert_eq!(x + x, x2);
+        assert_eq!(x >> k, _1);
+        assert_eq!(x2 >> (k + 1), _1);
+    }
+}
+#[test]
+fn i256_shifts() {
+    let _1 = i256::ONE;
+    for k in 0..254 {
+        let x = _1 << k;
+        let x2 = _1 << (k + 1);
+        assert!(x < x2);
+        assert_eq!(x << 1, x2);
+        assert_eq!(x + x, x2);
+        assert_eq!(x >> k, _1);
+        assert_eq!(x2 >> (k + 1), _1);
+    }
+
+    let min = _1 << 255;
+    assert_eq!(min, i256::MIN);
+    let mut x = min;
+    for k in 0..255 {
+        assert_eq!(x, min >> k);
+        let y = x >> 1;
+        assert_eq!(y + y, x);
+        assert!(x < y);
+        x = y;
+    }
+}
diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
index 4c866ef10..fb790e696 100644
--- a/libm/src/math/support/float_traits.rs
+++ b/libm/src/math/support/float_traits.rs
@@ -6,6 +6,7 @@ use super::int_traits::{CastFrom, Int, MinInt};
 
 /// Trait for some basic operations on floats
 // #[allow(dead_code)]
+#[allow(dead_code)] // Some constants are only used with tests
 pub trait Float:
     Copy
     + fmt::Debug
@@ -189,6 +190,15 @@ pub trait Float:
             Self::ONE.copysign(self)
         }
     }
+
+    /// Make a best-effort attempt to canonicalize the number. Note that this is allowed
+    /// to be a nop and does not always quiet sNaNs.
+    fn canonicalize(self) -> Self {
+        // FIXME: LLVM often removes this. We should determine whether we can remove the operation,
+        // or switch to something based on `llvm.canonicalize` (which has crashes,
+        // <https://github.com/llvm/llvm-project/issues/32650>).
+        self * Self::ONE
+    }
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
@@ -353,6 +363,7 @@ pub const fn f32_from_bits(bits: u32) -> f32 {
 }
 
 /// `f32::to_bits`
+#[allow(dead_code)] // workaround for false positive RUST-144060
 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f32_to_bits(x: f32) -> u32 {
     // SAFETY: POD cast with no preconditions
@@ -367,6 +378,7 @@ pub const fn f64_from_bits(bits: u64) -> f64 {
 }
 
 /// `f64::to_bits`
+#[allow(dead_code)] // workaround for false positive RUST-144060
 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f64_to_bits(x: f64) -> u64 {
     // SAFETY: POD cast with no preconditions
diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs
index 85569d98a..c8558b900 100644
--- a/libm/src/math/support/hex_float.rs
+++ b/libm/src/math/support/hex_float.rs
@@ -1,8 +1,6 @@
 //! Utilities for working with hex float formats.
 
-use core::fmt;
-
-use super::{Float, Round, Status, f32_from_bits, f64_from_bits};
+use super::{Round, Status, f32_from_bits, f64_from_bits};
 
 /// Construct a 16-bit float from hex float representation (C-style)
 #[cfg(f16_enabled)]
@@ -352,133 +350,143 @@ const fn u128_ilog2(v: u128) -> u32 {
     u128::BITS - 1 - v.leading_zeros()
 }
 
-/// Format a floating point number as its IEEE hex (`%a`) representation.
-pub struct Hexf<F>(pub F);
+#[cfg(any(test, feature = "unstable-public-internals"))]
+mod hex_fmt {
+    use core::fmt;
 
-// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
-#[cfg(not(feature = "compiler-builtins"))]
-fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    if x.is_sign_negative() {
-        write!(f, "-")?;
-    }
+    use crate::support::Float;
 
-    if x.is_nan() {
-        return write!(f, "NaN");
-    } else if x.is_infinite() {
-        return write!(f, "inf");
-    } else if *x == F::ZERO {
-        return write!(f, "0x0p+0");
-    }
+    /// Format a floating point number as its IEEE hex (`%a`) representation.
+    pub struct Hexf<F>(pub F);
 
-    let mut exponent = x.exp_unbiased();
-    let sig = x.to_bits() & F::SIG_MASK;
-
-    let bias = F::EXP_BIAS as i32;
-    // The mantissa MSB needs to be shifted up to the nearest nibble.
-    let mshift = (4 - (F::SIG_BITS % 4)) % 4;
-    let sig = sig << mshift;
-    // The width is rounded up to the nearest char (4 bits)
-    let mwidth = (F::SIG_BITS as usize + 3) / 4;
-    let leading = if exponent == -bias {
-        // subnormal number means we shift our output by 1 bit.
-        exponent += 1;
-        "0."
-    } else {
-        "1."
-    };
+    // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
+    #[cfg(not(feature = "compiler-builtins"))]
+    pub(super) fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if x.is_sign_negative() {
+            write!(f, "-")?;
+        }
 
-    write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
-}
+        if x.is_nan() {
+            return write!(f, "NaN");
+        } else if x.is_infinite() {
+            return write!(f, "inf");
+        } else if *x == F::ZERO {
+            return write!(f, "0x0p+0");
+        }
 
-#[cfg(feature = "compiler-builtins")]
-fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    unimplemented!()
-}
+        let mut exponent = x.exp_unbiased();
+        let sig = x.to_bits() & F::SIG_MASK;
+
+        let bias = F::EXP_BIAS as i32;
+        // The mantissa MSB needs to be shifted up to the nearest nibble.
+        let mshift = (4 - (F::SIG_BITS % 4)) % 4;
+        let sig = sig << mshift;
+        // The width is rounded up to the nearest char (4 bits)
+        let mwidth = (F::SIG_BITS as usize + 3) / 4;
+        let leading = if exponent == -bias {
+            // subnormal number means we shift our output by 1 bit.
+            exponent += 1;
+            "0."
+        } else {
+            "1."
+        };
 
-impl<F: Float> fmt::LowerHex for Hexf<F> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt_any_hex(&self.0, f)
+        write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
+    }
+
+    #[cfg(feature = "compiler-builtins")]
+    pub(super) fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        unimplemented!()
+    }
+
+    impl<F: Float> fmt::LowerHex for Hexf<F> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt_any_hex(&self.0, f)
+                }
             }
         }
     }
-}
 
-impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+    impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+                }
             }
         }
     }
-}
 
-impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+    impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+                }
             }
         }
     }
-}
 
-impl fmt::LowerHex for Hexf<i32> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(&self.0, f)
+    impl fmt::LowerHex for Hexf<i32> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(&self.0, f)
+                }
             }
         }
     }
-}
 
-impl<T> fmt::Debug for Hexf<T>
-where
-    Hexf<T>: fmt::LowerHex,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(self, f)
+    impl<T> fmt::Debug for Hexf<T>
+    where
+        Hexf<T>: fmt::LowerHex,
+    {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(self, f)
+                }
             }
         }
     }
-}
 
-impl<T> fmt::Display for Hexf<T>
-where
-    Hexf<T>: fmt::LowerHex,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(self, f)
+    impl<T> fmt::Display for Hexf<T>
+    where
+        Hexf<T>: fmt::LowerHex,
+    {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(self, f)
+                }
             }
         }
     }
 }
 
+#[cfg(any(test, feature = "unstable-public-internals"))]
+pub use hex_fmt::*;
+
 #[cfg(test)]
 mod parse_tests {
     extern crate std;
@@ -1064,6 +1072,7 @@ mod print_tests {
     use std::string::ToString;
 
     use super::*;
+    use crate::support::Float;
 
     #[test]
     #[cfg(f16_enabled)]
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index 716af748a..9d8826dfe 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -1,6 +1,7 @@
 use core::{cmp, fmt, ops};
 
 /// Minimal integer implementations needed on all integer types, including wide integers.
+#[allow(dead_code)] // Some constants are only used with tests
 pub trait MinInt:
     Copy
     + fmt::Debug
@@ -36,8 +37,6 @@ pub trait Int:
     + fmt::Display
     + fmt::Binary
     + fmt::LowerHex
-    + PartialEq
-    + PartialOrd
     + ops::AddAssign
     + ops::SubAssign
     + ops::MulAssign
@@ -101,7 +100,10 @@ pub trait Int:
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
     fn overflowing_sub(self, other: Self) -> (Self, bool);
+    fn carrying_add(self, other: Self, carry: bool) -> (Self, bool);
+    fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool);
     fn leading_zeros(self) -> u32;
+    fn trailing_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
 
@@ -167,12 +169,30 @@ macro_rules! int_impl_common {
             <Self>::leading_zeros(self)
         }
 
+        fn trailing_zeros(self) -> u32 {
+            <Self>::trailing_zeros(self)
+        }
+
         fn ilog2(self) -> u32 {
             // On our older MSRV, this resolves to the trait method. Which won't actually work,
             // but this is only called behind other gates.
             #[allow(clippy::incompatible_msrv)]
             <Self>::ilog2(self)
         }
+
+        fn carrying_add(self, other: Self, carry: bool) -> (Self, bool) {
+            let (ab, of1) = self.overflowing_add(other);
+            let (abc, of2) = ab.overflowing_add(Self::from_bool(carry));
+            // `of1 && of2` is possible with signed integers if a negative sum
+            // overflows to `MAX` and adding the carry overflows again back to `MIN`
+            (abc, of1 ^ of2)
+        }
+
+        fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool) {
+            let (ab, of1) = self.overflowing_sub(other);
+            let (abc, of2) = ab.overflowing_sub(Self::from_bool(borrow));
+            (abc, of1 ^ of2)
+        }
     };
 }
 
diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs
index 0b72db0e4..550d2e92e 100644
--- a/libm/src/math/support/macros.rs
+++ b/libm/src/math/support/macros.rs
@@ -137,16 +137,18 @@ macro_rules! hf128 {
 #[cfg(test)]
 macro_rules! assert_biteq {
     ($left:expr, $right:expr, $($tt:tt)*) => {{
-        use $crate::support::Int;
         let l = $left;
         let r = $right;
-        let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value
+        // hack to get width from a value
+        let bits = $crate::support::Int::leading_zeros(l.to_bits() - l.to_bits());
         assert!(
-            l.biteq(r),
-            "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
+            $crate::support::Float::biteq(l, r),
+            "{}\nl: {l:?} ({lb:#0width$x} {lh})\nr: {r:?} ({rb:#0width$x} {rh})",
             format_args!($($tt)*),
             lb = l.to_bits(),
+            lh = $crate::support::Hexf(l),
             rb = r.to_bits(),
+            rh = $crate::support::Hexf(r),
             width = ((bits / 4) + 2) as usize,
 
         );
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index 2771cfd32..b2d7bd8d5 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -11,12 +11,15 @@ mod int_traits;
 
 #[allow(unused_imports)]
 pub use big::{i256, u256};
-#[allow(unused_imports)]
+// Clippy seems to have a false positive
+#[allow(unused_imports, clippy::single_component_path_imports)]
 pub(crate) use cfg_if;
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
+#[cfg(any(test, feature = "unstable-public-internals"))]
+pub use hex_float::Hexf;
 #[cfg(f16_enabled)]
 #[allow(unused_imports)]
 pub use hex_float::hf16;
@@ -24,7 +27,7 @@ pub use hex_float::hf16;
 #[allow(unused_imports)]
 pub use hex_float::hf128;
 #[allow(unused_imports)]
-pub use hex_float::{Hexf, hf32, hf64};
+pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
 
 /// Hint to the compiler that the current path is cold.
diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs
index a072bdec5..79c1bad56 100644
--- a/libm/src/math/tan.rs
+++ b/libm/src/math/tan.rs
@@ -43,7 +43,7 @@ use super::{k_tan, rem_pio2};
 /// The tangent of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tan(x: f64) -> f64 {
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs
index 8bcf9581f..a615573d8 100644
--- a/libm/src/math/tanf.rs
+++ b/libm/src/math/tanf.rs
@@ -27,7 +27,7 @@ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The tangent of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs
index cc0abe4fc..c99cc2a70 100644
--- a/libm/src/math/tanh.rs
+++ b/libm/src/math/tanh.rs
@@ -8,7 +8,7 @@ use super::expm1;
 /// The hyperbolic tangent of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanh(mut x: f64) -> f64 {
     let mut uf: f64 = x;
     let mut ui: u64 = f64::to_bits(uf);
diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs
index fffbba6c6..3cbd5917f 100644
--- a/libm/src/math/tanhf.rs
+++ b/libm/src/math/tanhf.rs
@@ -3,7 +3,7 @@ use super::expm1f;
 /// The hyperbolic tangent of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanhf(mut x: f32) -> f32 {
     /* x = |x| */
     let mut ix = x.to_bits();
diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs
index 305986064..41415d9d1 100644
--- a/libm/src/math/tgamma.rs
+++ b/libm/src/math/tgamma.rs
@@ -131,7 +131,7 @@ fn s(x: f64) -> f64 {
 }
 
 /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tgamma(mut x: f64) -> f64 {
     let u: u64 = x.to_bits();
     let absx: f64;
diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs
index fe178f7a3..a63a2a318 100644
--- a/libm/src/math/tgammaf.rs
+++ b/libm/src/math/tgammaf.rs
@@ -1,7 +1,7 @@
 use super::tgamma;
 
 /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tgammaf(x: f32) -> f32 {
     tgamma(x as f64) as f32
 }
diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs
index fa50d55e1..20d52a111 100644
--- a/libm/src/math/trunc.rs
+++ b/libm/src/math/trunc.rs
@@ -2,7 +2,7 @@
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf16(x: f16) -> f16 {
     super::generic::trunc(x)
 }
@@ -10,7 +10,7 @@ pub fn truncf16(x: f16) -> f16 {
 /// Rounds the number toward 0 to the closest integral value (f32).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf(x: f32) -> f32 {
     select_implementation! {
         name: truncf,
@@ -24,7 +24,7 @@ pub fn truncf(x: f32) -> f32 {
 /// Rounds the number toward 0 to the closest integral value (f64).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn trunc(x: f64) -> f64 {
     select_implementation! {
         name: trunc,
@@ -39,7 +39,7 @@ pub fn trunc(x: f64) -> f64 {
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf128(x: f128) -> f128 {
     super::generic::trunc(x)
 }
diff --git a/rust-version b/rust-version
new file mode 100644
index 000000000..a4db05a87
--- /dev/null
+++ b/rust-version
@@ -0,0 +1 @@
+82310651b93a594a3fd69015e1562186a080d94c
diff --git a/thumbv6m-linux-eabi.json b/thumbv6m-linux-eabi.json
deleted file mode 100644
index ac736eae6..000000000
--- a/thumbv6m-linux-eabi.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "features": "+strict-align",
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv6m-none-eabi",
-    "max-atomic-width": 0,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7em-linux-eabi.json b/thumbv7em-linux-eabi.json
deleted file mode 100644
index b6d4a6bda..000000000
--- a/thumbv7em-linux-eabi.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7em-none-eabi",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7em-linux-eabihf.json b/thumbv7em-linux-eabihf.json
deleted file mode 100644
index 81cfcd48d..000000000
--- a/thumbv7em-linux-eabihf.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "features": "+vfp4,+d16,+fp-only-sp",
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7em-none-eabihf",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7m-linux-eabi.json b/thumbv7m-linux-eabi.json
deleted file mode 100644
index abe037c5b..000000000
--- a/thumbv7m-linux-eabi.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7m-none-eabi",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/triagebot.toml b/triagebot.toml
new file mode 100644
index 000000000..eba5cdd88
--- /dev/null
+++ b/triagebot.toml
@@ -0,0 +1,21 @@
+## See <https://forge.rust-lang.org/triagebot/index.html> for documentation
+## of these features.
+
+# Warns when a PR contains merge commits
+# Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html
+[no-merges]
+exclude_titles = ["Rustc pull update"]
+
+# Canonicalize issue numbers to avoid closing the wrong issue
+# when commits are included in subtrees, as well as warning links in commits.
+# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html
+[issue-links]
+check-commits = false
+
+# Prevents mentions in commits to avoid users being spammed
+# Documentation at: https://forge.rust-lang.org/triagebot/no-mentions.html
+[no-mentions]
+
+# Enable issue transfers within the org
+# Documentation at: https://forge.rust-lang.org/triagebot/transfer.html
+[transfer]