-
Notifications
You must be signed in to change notification settings - Fork 13.6k
compiler-builtins: plumb LSE support for aarch64 on linux/gnu when optimized-compiler-builtins not enabled #144705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,6 @@ | |
//! which is supported on the current CPU. | ||
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion. | ||
//! | ||
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. | ||
//! Use the `compiler-rt` intrinsics if you want LSE support. | ||
//! | ||
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt. | ||
//! | ||
//! Generate functions for each of the following symbols: | ||
|
@@ -24,7 +21,18 @@ | |
//! We do something similar, but with macro arguments. | ||
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule | ||
|
||
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. | ||
use core::sync::atomic::{AtomicU8, Ordering}; | ||
|
||
/// non-zero if the host supports LSE atomics. | ||
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0); | ||
|
||
intrinsics! { | ||
/// Call to enable LSE in outline atomic operations. The caller must verify | ||
/// LSE operations are supported. | ||
pub extern "C" fn __rust_enable_lse() { | ||
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed); | ||
} | ||
} | ||
|
||
/// Translate a byte size to a Rust type. | ||
#[rustfmt::skip] | ||
|
@@ -126,6 +134,39 @@ macro_rules! stxp { | |
}; | ||
} | ||
|
||
// Check if LSE intrinsic can be used, and jump to label if not. | ||
macro_rules! jmp_if_no_lse { | ||
($label:literal) => { | ||
concat!( | ||
".arch_extension lse; ", | ||
"adrp x16, {have_lse}; ", | ||
"ldrb w16, [x16, :lo12:{have_lse}]; ", | ||
"cbz w16, ", | ||
$label, | ||
";" | ||
) | ||
}; | ||
} | ||
|
||
// Translate memory ordering to the LSE suffix | ||
#[rustfmt::skip] | ||
macro_rules! lse_mem_sfx { | ||
(Relaxed) => { "" }; | ||
(Acquire) => { "a" }; | ||
(Release) => { "l" }; | ||
(AcqRel) => { "al" }; | ||
} | ||
|
||
// Generate the aarch64 LSE operation for memory ordering and width | ||
macro_rules! lse { | ||
($op:literal, $order:ident, 16) => { | ||
concat!($op, "p", lse_mem_sfx!($order)) | ||
}; | ||
($op:literal, $order:ident, $bytes:tt) => { | ||
concat!($op, lse_mem_sfx!($order), size!($bytes)) | ||
}; | ||
} | ||
|
||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>. | ||
macro_rules! compare_and_swap { | ||
($ordering:ident, $bytes:tt, $name:ident) => { | ||
|
@@ -137,6 +178,11 @@ macro_rules! compare_and_swap { | |
) -> int_ty!($bytes) { | ||
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. | ||
core::arch::naked_asm! { | ||
jmp_if_no_lse!("8f"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be better if this macro instead took the LSE instruction as an argument and included the |
||
// CAS s(0), s(1), [x2] | ||
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"), | ||
"ret", | ||
"8:", | ||
// UXT s(tmp0), s(0) | ||
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), | ||
"0:", | ||
|
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap { | |
"cbnz w17, 0b", | ||
"1:", | ||
"ret", | ||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, | ||
} | ||
} | ||
} | ||
|
@@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 { | |
expected: i128, desired: i128, ptr: *mut i128 | ||
) -> i128 { | ||
core::arch::naked_asm! { | ||
jmp_if_no_lse!("8f"), | ||
// CASP x0, x1, x2, x3, [x4] | ||
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"), | ||
"ret", | ||
"8:", | ||
"mov x16, x0", | ||
"mov x17, x1", | ||
"0:", | ||
|
@@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 { | |
"cbnz w15, 0b", | ||
"1:", | ||
"ret", | ||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, | ||
} | ||
} | ||
} | ||
|
@@ -195,6 +248,11 @@ macro_rules! swap { | |
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) | ||
) -> int_ty!($bytes) { | ||
core::arch::naked_asm! { | ||
jmp_if_no_lse!("8f"), | ||
// SWP s(0), s(0), [x1] | ||
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"), | ||
"ret", | ||
"8:", | ||
// mov s(tmp0), s(0) | ||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), | ||
"0:", | ||
|
@@ -204,6 +262,7 @@ macro_rules! swap { | |
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), | ||
"cbnz w17, 0b", | ||
"ret", | ||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, | ||
} | ||
} | ||
} | ||
|
@@ -212,14 +271,19 @@ macro_rules! swap { | |
|
||
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>. | ||
macro_rules! fetch_op { | ||
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { | ||
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => { | ||
intrinsics! { | ||
#[maybe_use_optimized_c_shim] | ||
#[unsafe(naked)] | ||
pub unsafe extern "C" fn $name ( | ||
val: int_ty!($bytes), ptr: *mut int_ty!($bytes) | ||
) -> int_ty!($bytes) { | ||
core::arch::naked_asm! { | ||
jmp_if_no_lse!("8f"), | ||
// LSEOP s(0), s(0), [x1] | ||
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"), | ||
"ret", | ||
"8:", | ||
// mov s(tmp0), s(0) | ||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), | ||
"0:", | ||
|
@@ -231,6 +295,7 @@ macro_rules! fetch_op { | |
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), | ||
"cbnz w15, 0b", | ||
"ret", | ||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, | ||
} | ||
} | ||
} | ||
|
@@ -240,25 +305,25 @@ macro_rules! fetch_op { | |
// We need a single macro to pass to `foreach_ldadd`. | ||
macro_rules! add { | ||
($ordering:ident, $bytes:tt, $name:ident) => { | ||
fetch_op! { $ordering, $bytes, $name, "add" } | ||
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" } | ||
}; | ||
} | ||
|
||
macro_rules! and { | ||
($ordering:ident, $bytes:tt, $name:ident) => { | ||
fetch_op! { $ordering, $bytes, $name, "bic" } | ||
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" } | ||
}; | ||
} | ||
|
||
macro_rules! xor { | ||
($ordering:ident, $bytes:tt, $name:ident) => { | ||
fetch_op! { $ordering, $bytes, $name, "eor" } | ||
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" } | ||
}; | ||
} | ||
|
||
macro_rules! or { | ||
($ordering:ident, $bytes:tt, $name:ident) => { | ||
fetch_op! { $ordering, $bytes, $name, "orr" } | ||
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" } | ||
}; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/// Hook into .init_array to enable LSE atomic operations at startup, if | ||
/// supported. | ||
#[cfg(all( | ||
target_arch = "aarch64", | ||
target_os = "linux", | ||
any(target_env = "gnu", target_env = "musl"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need to check target-env at all here. Any linux target should be fine. |
||
not(feature = "compiler-builtins-c") | ||
))] | ||
#[used] | ||
#[unsafe(link_section = ".init_array.90")] | ||
static RUST_LSE_INIT: extern "C" fn() = { | ||
extern "C" fn init_lse() { | ||
use crate::arch; | ||
|
||
// This is provided by compiler-builtins::aarch64_linux. | ||
unsafe extern "C" { | ||
fn __rust_enable_lse(); | ||
} | ||
|
||
if arch::is_aarch64_feature_detected!("lse") { | ||
unsafe { | ||
__rust_enable_lse(); | ||
} | ||
} | ||
} | ||
init_lse | ||
}; |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably okay to call this directly in
std
, but all builtins intrinsics are called byextern
so it's probably safest to do the same here (thestd
dependency should actually be removed in 42bf044). The intrinsics macro handlesno_mangle
and weak linkage:Any reason this is a draft still? Assuming you have tested that the current setup works, it lgtm with the above change (though I'll ask Amaieu to take a look)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I'll update the usage. I'll take this out of draft when I push the update.