diff --git a/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs b/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs index 38fcab152aed2..01d7fb4732918 100644 --- a/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs +++ b/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs @@ -6,9 +6,6 @@ //! which is supported on the current CPU. //! See for more discussion. //! -//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. -//! Use the `compiler-rt` intrinsics if you want LSE support. -//! //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: @@ -24,7 +21,18 @@ //! We do something similar, but with macro arguments. #![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule -// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. +use core::sync::atomic::{AtomicU8, Ordering}; + +/// non-zero if the host supports LSE atomics. +static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0); + +intrinsics! { + /// Call to enable LSE in outline atomic operations. The caller must verify + /// LSE operations are supported. + pub extern "C" fn __rust_enable_lse() { + HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed); + } +} /// Translate a byte size to a Rust type. #[rustfmt::skip] @@ -45,6 +53,7 @@ macro_rules! reg { (2, $num:literal) => { concat!("w", $num) }; (4, $num:literal) => { concat!("w", $num) }; (8, $num:literal) => { concat!("x", $num) }; + (16, $num:literal) => { concat!("x", $num) }; } /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. @@ -126,6 +135,41 @@ macro_rules! stxp { }; } +// If supported, perform the requested LSE op and return, or fallthrough. +macro_rules! try_lse_op { + ($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => { + concat!( + ".arch_extension lse; ", + "adrp x16, {have_lse}; ", + "ldrb w16, [x16, :lo12:{have_lse}]; ", + "cbz w16, 8f; ", + // LSE_OP s(reg),* [$mem] + concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",), + "ret; ", + "8:" + ) + }; +} + +// Translate memory ordering to the LSE suffix +#[rustfmt::skip] +macro_rules! lse_mem_sfx { + (Relaxed) => { "" }; + (Acquire) => { "a" }; + (Release) => { "l" }; + (AcqRel) => { "al" }; +} + +// Generate the aarch64 LSE operation for memory ordering and width +macro_rules! lse { + ($op:literal, $order:ident, 16) => { + concat!($op, "p", lse_mem_sfx!($order)) + }; + ($op:literal, $order:ident, $bytes:tt) => { + concat!($op, lse_mem_sfx!($order), size!($bytes)) + }; +} + /// See . macro_rules! compare_and_swap { ($ordering:ident, $bytes:tt, $name:ident) => { @@ -137,7 +181,9 @@ macro_rules! compare_and_swap { ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. core::arch::naked_asm! { - // UXT s(tmp0), s(0) + // CAS s(0), s(1), [x2]; if LSE supported. + try_lse_op!("cas", $ordering, $bytes, 0, 1, [x2]), + // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", // LDXR s(0), [x2] @@ -150,6 +196,7 @@ macro_rules! compare_and_swap { "cbnz w17, 0b", "1:", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 { expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { core::arch::naked_asm! { + // CASP x0, x1, x2, x3, [x4]; if LSE supported. + try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]), "mov x16, x0", "mov x17, x1", "0:", @@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 { "cbnz w15, 0b", "1:", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -195,6 +245,8 @@ macro_rules! swap { left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + // SWP s(0), s(0), [x1]; if LSE supported. + try_lse_op!("swp", $ordering, $bytes, 0, 0, [x1]), // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -204,6 +256,7 @@ macro_rules! swap { concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -212,7 +265,7 @@ macro_rules! swap { /// See (e.g.) . macro_rules! fetch_op { - ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { + ($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => { intrinsics! { #[maybe_use_optimized_c_shim] #[unsafe(naked)] @@ -220,6 +273,8 @@ macro_rules! fetch_op { val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + // LSEOP s(0), s(0), [x1]; if LSE supported. + try_lse_op!($lse_op, $ordering, $bytes, 0, 0, [x1]), // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -231,6 +286,7 @@ macro_rules! fetch_op { concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -240,25 +296,25 @@ macro_rules! fetch_op { // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "add" } + fetch_op! { $ordering, $bytes, $name, "add", "ldadd" } }; } macro_rules! and { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "bic" } + fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" } }; } macro_rules! xor { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "eor" } + fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" } }; } macro_rules! or { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "orr" } + fetch_op! { $ordering, $bytes, $name, "orr", "ldset" } }; } diff --git a/library/std/src/sys/configure_builtins.rs b/library/std/src/sys/configure_builtins.rs new file mode 100644 index 0000000000000..9d776b778dcbe --- /dev/null +++ b/library/std/src/sys/configure_builtins.rs @@ -0,0 +1,22 @@ +/// Hook into .init_array to enable LSE atomic operations at startup, if +/// supported. +#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "compiler-builtins-c")))] +#[used] +#[unsafe(link_section = ".init_array.90")] +static RUST_LSE_INIT: extern "C" fn() = { + extern "C" fn init_lse() { + use crate::arch; + + // This is provided by compiler-builtins::aarch64_linux. + unsafe extern "C" { + fn __rust_enable_lse(); + } + + if arch::is_aarch64_feature_detected!("lse") { + unsafe { + __rust_enable_lse(); + } + } + } + init_lse +}; diff --git a/library/std/src/sys/mod.rs b/library/std/src/sys/mod.rs index f9a02b522e5e1..8ec0a0e33023f 100644 --- a/library/std/src/sys/mod.rs +++ b/library/std/src/sys/mod.rs @@ -1,5 +1,10 @@ #![allow(unsafe_op_in_unsafe_fn)] +/// The configure builtins provides runtime support compiler-builtin features +/// which require dynamic intialization to work as expected, e.g. aarch64 +/// outline-atomics. +mod configure_builtins; + /// The PAL (platform abstraction layer) contains platform-specific abstractions /// for implementing the features in the other submodules, e.g. UNIX file /// descriptors.