Skip to content

compiler-builtins: plumb LSE support for aarch64 on linux/gnu when optimized-compiler-builtins not enabled #144705

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 74 additions & 9 deletions library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//! which is supported on the current CPU.
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
//!
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
//! Use the `compiler-rt` intrinsics if you want LSE support.
//!
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
//!
//! Generate functions for each of the following symbols:
Expand All @@ -24,7 +21,18 @@
//! We do something similar, but with macro arguments.
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule

// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
use core::sync::atomic::{AtomicU8, Ordering};

/// non-zero if the host supports LSE atomics.
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);

intrinsics! {
/// Call to enable LSE in outline atomic operations. The caller must verify
/// LSE operations are supported.
pub extern "C" fn __rust_enable_lse() {
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
}
}
Copy link
Contributor

@tgross35 tgross35 Jul 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably okay to call this directly in std, but all builtins intrinsics are called by extern so it's probably safest to do the same here (the std dependency should actually be removed in 42bf044). The intrinsics macro handles no_mangle and weak linkage:

intrinsics! {
    pub extern "C" fn __rust_enable_lse() {
        // ...
    }
}

Any reason this is a draft still? Assuming you have tested that the current setup works, it lgtm with the above change (though I'll ask Amaieu to take a look)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'll update the usage. I'll take this out of draft when I push the update.


/// Translate a byte size to a Rust type.
#[rustfmt::skip]
Expand Down Expand Up @@ -126,6 +134,39 @@ macro_rules! stxp {
};
}

// Check if LSE intrinsic can be used, and jump to label if not.
macro_rules! jmp_if_no_lse {
($label:literal) => {
concat!(
".arch_extension lse; ",
"adrp x16, {have_lse}; ",
"ldrb w16, [x16, :lo12:{have_lse}]; ",
"cbz w16, ",
$label,
";"
)
};
}

// Translate memory ordering to the LSE suffix
#[rustfmt::skip]
macro_rules! lse_mem_sfx {
(Relaxed) => { "" };
(Acquire) => { "a" };
(Release) => { "l" };
(AcqRel) => { "al" };
}

// Generate the aarch64 LSE operation for memory ordering and width
macro_rules! lse {
($op:literal, $order:ident, 16) => {
concat!($op, "p", lse_mem_sfx!($order))
};
($op:literal, $order:ident, $bytes:tt) => {
concat!($op, lse_mem_sfx!($order), size!($bytes))
};
}

/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
macro_rules! compare_and_swap {
($ordering:ident, $bytes:tt, $name:ident) => {
Expand All @@ -137,6 +178,11 @@ macro_rules! compare_and_swap {
) -> int_ty!($bytes) {
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better if this macro instead took the LSE instruction as an argument and included the ret and 8:. This would reduce the code here to a single line.

// CAS s(0), s(1), [x2]
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
"ret",
"8:",
// UXT s(tmp0), s(0)
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
"cbnz w17, 0b",
"1:",
"ret",
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 {
expected: i128, desired: i128, ptr: *mut i128
) -> i128 {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// CASP x0, x1, x2, x3, [x4]
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
"ret",
"8:",
"mov x16, x0",
"mov x17, x1",
"0:",
Expand All @@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 {
"cbnz w15, 0b",
"1:",
"ret",
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -195,6 +248,11 @@ macro_rules! swap {
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// SWP s(0), s(0), [x1]
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
"ret",
"8:",
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -204,6 +262,7 @@ macro_rules! swap {
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
"cbnz w17, 0b",
"ret",
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -212,14 +271,19 @@ macro_rules! swap {

/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
macro_rules! fetch_op {
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// LSEOP s(0), s(0), [x1]
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
"ret",
"8:",
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -231,6 +295,7 @@ macro_rules! fetch_op {
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
"cbnz w15, 0b",
"ret",
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -240,25 +305,25 @@ macro_rules! fetch_op {
// We need a single macro to pass to `foreach_ldadd`.
macro_rules! add {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "add" }
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
};
}

macro_rules! and {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "bic" }
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
};
}

macro_rules! xor {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "eor" }
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
};
}

macro_rules! or {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "orr" }
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
};
}

Expand Down
27 changes: 27 additions & 0 deletions library/std/src/sys/configure_builtins.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/// Hook into .init_array to enable LSE atomic operations at startup, if
/// supported.
#[cfg(all(
target_arch = "aarch64",
target_os = "linux",
any(target_env = "gnu", target_env = "musl"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need to check target-env at all here. Any linux target should be fine.

not(feature = "compiler-builtins-c")
))]
#[used]
#[unsafe(link_section = ".init_array.90")]
static RUST_LSE_INIT: extern "C" fn() = {
extern "C" fn init_lse() {
use crate::arch;

// This is provided by compiler-builtins::aarch64_linux.
unsafe extern "C" {
fn __rust_enable_lse();
}

if arch::is_aarch64_feature_detected!("lse") {
unsafe {
__rust_enable_lse();
}
}
}
init_lse
};
5 changes: 5 additions & 0 deletions library/std/src/sys/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#![allow(unsafe_op_in_unsafe_fn)]

/// The configure builtins provides runtime support compiler-builtin features
/// which require dynamic intialization to work as expected, e.g. aarch64
/// outline-atomics.
mod configure_builtins;

/// The PAL (platform abstraction layer) contains platform-specific abstractions
/// for implementing the features in the other submodules, e.g. UNIX file
/// descriptors.
Expand Down
Loading