Skip to content

Commit 3dd96a4

Browse files
Paul Murphypmur
authored andcommitted
compiler-builtins: plumb LSE support for aarch64 on linux/gnu
Add dynamic support for aarch64 LSE atomic ops on linux/gnu targets when optimized-compiler-builtins is not enabled. The resulting asm should exactly match that of LLVM's compiler-rt builtins, though the symbol naming for the support function and global does not.
1 parent 2e53675 commit 3dd96a4

File tree

1 file changed

+92
-9
lines changed

1 file changed

+92
-9
lines changed

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs

Lines changed: 92 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//! which is supported on the current CPU.
77
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88
//!
9-
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10-
//! Use the `compiler-rt` intrinsics if you want LSE support.
11-
//!
129
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310
//!
1411
//! Generate functions for each of the following symbols:
@@ -24,7 +21,36 @@
2421
//! We do something similar, but with macro arguments.
2522
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
2623

27-
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+
use core::sync::atomic::{AtomicU8, Ordering};
25+
26+
/// non-zero if the host supports LSE atomics.
27+
#[cfg(not(feature = "c"))]
28+
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
29+
30+
/// outline-atomics are only enabled with glibc support, add a .init_array entry to
31+
/// check and enable LSE via getauxval. This behavior is similar to compiler rt.
32+
#[cfg(target_env = "gnu")]
33+
#[unsafe(link_section = ".init_array.90")]
34+
pub static RUST_LSE_INIT: extern "C" fn() = {
35+
extern "C" fn aarch64_rust_init_lse_atomics() {
36+
const AT_HWCAP: core::ffi::c_ulong = 16;
37+
const HWCAP_ATOMICS: core::ffi::c_ulong = 0x100;
38+
let hwcap;
39+
40+
// The most straightforward path to querying for LSE support is the host's libc.
41+
// We can't use the libc crate here, we are a dependency.
42+
unsafe extern "C" {
43+
fn getauxval(num: core::ffi::c_ulong) -> core::ffi::c_ulong;
44+
}
45+
unsafe {
46+
hwcap = getauxval(AT_HWCAP);
47+
}
48+
if hwcap & HWCAP_ATOMICS != 0 {
49+
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
50+
}
51+
}
52+
aarch64_rust_init_lse_atomics
53+
};
2854

2955
/// Translate a byte size to a Rust type.
3056
#[rustfmt::skip]
@@ -126,6 +152,39 @@ macro_rules! stxp {
126152
};
127153
}
128154

155+
// Check if LSE intrinsic can be used, and jump to label if not.
156+
macro_rules! jmp_if_no_lse {
157+
($label:literal) => {
158+
concat!(
159+
".arch_extension lse; ",
160+
"adrp x16, {have_lse}; ",
161+
"ldrb w16, [x16, :lo12:{have_lse}]; ",
162+
"cbz w16, ",
163+
$label,
164+
";"
165+
)
166+
};
167+
}
168+
169+
// Translate memory ordering to the LSE suffix
170+
#[rustfmt::skip]
171+
macro_rules! lse_mem_sfx {
172+
(Relaxed) => { "" };
173+
(Acquire) => { "a" };
174+
(Release) => { "l" };
175+
(AcqRel) => { "al" };
176+
}
177+
178+
// Generate the aarch64 LSE operation for memory ordering and width
179+
macro_rules! lse {
180+
($op:literal, $order:ident, 16) => {
181+
concat!($op, "p", lse_mem_sfx!($order))
182+
};
183+
($op:literal, $order:ident, $bytes:tt) => {
184+
concat!($op, lse_mem_sfx!($order), size!($bytes))
185+
};
186+
}
187+
129188
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130189
macro_rules! compare_and_swap {
131190
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,6 +196,11 @@ macro_rules! compare_and_swap {
137196
) -> int_ty!($bytes) {
138197
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139198
core::arch::naked_asm! {
199+
jmp_if_no_lse!("8f"),
200+
// CAS s(0), s(1), [x2]
201+
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
202+
"ret",
203+
"8:",
140204
// UXT s(tmp0), s(0)
141205
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
142206
"0:",
@@ -150,6 +214,7 @@ macro_rules! compare_and_swap {
150214
"cbnz w17, 0b",
151215
"1:",
152216
"ret",
217+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
153218
}
154219
}
155220
}
@@ -166,6 +231,11 @@ macro_rules! compare_and_swap_i128 {
166231
expected: i128, desired: i128, ptr: *mut i128
167232
) -> i128 {
168233
core::arch::naked_asm! {
234+
jmp_if_no_lse!("8f"),
235+
// CASP x0, x1, x2, x3, [x4]
236+
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
237+
"ret",
238+
"8:",
169239
"mov x16, x0",
170240
"mov x17, x1",
171241
"0:",
@@ -179,6 +249,7 @@ macro_rules! compare_and_swap_i128 {
179249
"cbnz w15, 0b",
180250
"1:",
181251
"ret",
252+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
182253
}
183254
}
184255
}
@@ -195,6 +266,11 @@ macro_rules! swap {
195266
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
196267
) -> int_ty!($bytes) {
197268
core::arch::naked_asm! {
269+
jmp_if_no_lse!("8f"),
270+
// SWP s(0), s(0), [x1]
271+
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
272+
"ret",
273+
"8:",
198274
// mov s(tmp0), s(0)
199275
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
200276
"0:",
@@ -204,6 +280,7 @@ macro_rules! swap {
204280
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
205281
"cbnz w17, 0b",
206282
"ret",
283+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
207284
}
208285
}
209286
}
@@ -212,14 +289,19 @@ macro_rules! swap {
212289

213290
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214291
macro_rules! fetch_op {
215-
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
292+
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
216293
intrinsics! {
217294
#[maybe_use_optimized_c_shim]
218295
#[unsafe(naked)]
219296
pub unsafe extern "C" fn $name (
220297
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
221298
) -> int_ty!($bytes) {
222299
core::arch::naked_asm! {
300+
jmp_if_no_lse!("8f"),
301+
// LSEOP s(0), s(0), [x1]
302+
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
303+
"ret",
304+
"8:",
223305
// mov s(tmp0), s(0)
224306
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
225307
"0:",
@@ -231,6 +313,7 @@ macro_rules! fetch_op {
231313
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
232314
"cbnz w15, 0b",
233315
"ret",
316+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
234317
}
235318
}
236319
}
@@ -240,25 +323,25 @@ macro_rules! fetch_op {
240323
// We need a single macro to pass to `foreach_ldadd`.
241324
macro_rules! add {
242325
($ordering:ident, $bytes:tt, $name:ident) => {
243-
fetch_op! { $ordering, $bytes, $name, "add" }
326+
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
244327
};
245328
}
246329

247330
macro_rules! and {
248331
($ordering:ident, $bytes:tt, $name:ident) => {
249-
fetch_op! { $ordering, $bytes, $name, "bic" }
332+
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
250333
};
251334
}
252335

253336
macro_rules! xor {
254337
($ordering:ident, $bytes:tt, $name:ident) => {
255-
fetch_op! { $ordering, $bytes, $name, "eor" }
338+
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
256339
};
257340
}
258341

259342
macro_rules! or {
260343
($ordering:ident, $bytes:tt, $name:ident) => {
261-
fetch_op! { $ordering, $bytes, $name, "orr" }
344+
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
262345
};
263346
}
264347

0 commit comments

Comments
 (0)