Skip to content

Commit ee0641a

Browse files
Paul Murphypmur
authored andcommitted
compiler-builtins: plumb LSE support for aarch64 on linux/gnu
Add dynamic support for aarch64 LSE atomic ops on linux/gnu targets when optimized-compiler-builtins is not enabled. The resulting asm should exactly match that of LLVM's compiler-rt builtins, though the symbol naming for the support function and global does not.
1 parent 2e53675 commit ee0641a

File tree

1 file changed

+97
-9
lines changed

1 file changed

+97
-9
lines changed

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs

Lines changed: 97 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//! which is supported on the current CPU.
77
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88
//!
9-
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10-
//! Use the `compiler-rt` intrinsics if you want LSE support.
11-
//!
129
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310
//!
1411
//! Generate functions for each of the following symbols:
@@ -24,7 +21,38 @@
2421
//! We do something similar, but with macro arguments.
2522
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
2623

27-
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+
/// non-zero if the host supports LSE atomics.
25+
#[cfg(not(feature = "c"))]
26+
static mut HAVE_LSE_ATOMICS: u8 = 0;
27+
28+
/// This calls into libc's getauxval to check hwcap bits for LSE support. Note, at this time
29+
/// outline-atomics are only enabled on gnu targets.
30+
#[cfg(all(not(feature = "c"), target_env = "gnu"))]
31+
extern "C" fn aarch64_rust_init_lse_atomics() {
32+
// The most straightforward path to querying for LSE support is the host's libc.
33+
// We can't use the libc crate here, we are a dependency.
34+
unsafe extern "C" {
35+
fn getauxval(num: core::ffi::c_ulong) -> core::ffi::c_ulong;
36+
}
37+
38+
const AT_HWCAP: core::ffi::c_ulong = 16;
39+
const HWCAP_ATOMICS: core::ffi::c_ulong = 0x100;
40+
41+
unsafe {
42+
let hwcap = getauxval(AT_HWCAP);
43+
if hwcap & HWCAP_ATOMICS != 0 {
44+
HAVE_LSE_ATOMICS = 1;
45+
}
46+
}
47+
}
48+
49+
/// The entry into startup array for LSE initialization. This should be placed within
50+
/// the init array similar to compiler-rt. However, if it isn't called very early, that
51+
/// should also be OK as the fallback atomic operations always work.
52+
#[used]
53+
#[unsafe(link_section = ".init_array.90")]
54+
#[cfg(all(not(feature = "c"), target_env = "gnu"))]
55+
static RUST_LSE_INIT: extern "C" fn() = aarch64_rust_init_lse_atomics;
2856

2957
/// Translate a byte size to a Rust type.
3058
#[rustfmt::skip]
@@ -126,6 +154,42 @@ macro_rules! stxp {
126154
};
127155
}
128156

157+
// Check if LSE intrinsic can be used, and jump to label if not.
158+
macro_rules! jmp_if_no_lse {
159+
($label:literal) => {
160+
concat!(
161+
".arch_extension lse; ",
162+
".macro load sym;",
163+
"adrp x16, \\sym; ",
164+
"ldrb w16, [x16, :lo12:\\sym]; ",
165+
".endm;",
166+
"load sym={};",
167+
"cbz w16, ",
168+
$label,
169+
";"
170+
)
171+
};
172+
}
173+
174+
// Translate memory ordering to the LSE suffix
175+
#[rustfmt::skip]
176+
macro_rules! lse_mem_sfx {
177+
(Relaxed) => { "" };
178+
(Acquire) => { "a" };
179+
(Release) => { "l" };
180+
(AcqRel) => { "al" };
181+
}
182+
183+
// Generate the aarch64 LSE operation for memory ordering and width
184+
macro_rules! lse {
185+
($op:literal, $order:ident, 16) => {
186+
concat!($op, "p", lse_mem_sfx!($order))
187+
};
188+
($op:literal, $order:ident, $bytes:tt) => {
189+
concat!($op, lse_mem_sfx!($order), size!($bytes))
190+
};
191+
}
192+
129193
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130194
macro_rules! compare_and_swap {
131195
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,6 +201,11 @@ macro_rules! compare_and_swap {
137201
) -> int_ty!($bytes) {
138202
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139203
core::arch::naked_asm! {
204+
jmp_if_no_lse!("8f"),
205+
// CAS s(0), s(1), [x2]
206+
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
207+
"ret",
208+
"8:",
140209
// UXT s(tmp0), s(0)
141210
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
142211
"0:",
@@ -150,6 +219,7 @@ macro_rules! compare_and_swap {
150219
"cbnz w17, 0b",
151220
"1:",
152221
"ret",
222+
sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
153223
}
154224
}
155225
}
@@ -166,6 +236,11 @@ macro_rules! compare_and_swap_i128 {
166236
expected: i128, desired: i128, ptr: *mut i128
167237
) -> i128 {
168238
core::arch::naked_asm! {
239+
jmp_if_no_lse!("8f"),
240+
// CASP x0, x1, x2, x3, [x4]
241+
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
242+
"ret",
243+
"8:",
169244
"mov x16, x0",
170245
"mov x17, x1",
171246
"0:",
@@ -179,6 +254,7 @@ macro_rules! compare_and_swap_i128 {
179254
"cbnz w15, 0b",
180255
"1:",
181256
"ret",
257+
sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
182258
}
183259
}
184260
}
@@ -195,6 +271,11 @@ macro_rules! swap {
195271
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
196272
) -> int_ty!($bytes) {
197273
core::arch::naked_asm! {
274+
jmp_if_no_lse!("8f"),
275+
// SWP s(0), s(0), [x1]
276+
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
277+
"ret",
278+
"8:",
198279
// mov s(tmp0), s(0)
199280
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
200281
"0:",
@@ -204,6 +285,7 @@ macro_rules! swap {
204285
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
205286
"cbnz w17, 0b",
206287
"ret",
288+
sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
207289
}
208290
}
209291
}
@@ -212,14 +294,19 @@ macro_rules! swap {
212294

213295
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214296
macro_rules! fetch_op {
215-
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
297+
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
216298
intrinsics! {
217299
#[maybe_use_optimized_c_shim]
218300
#[unsafe(naked)]
219301
pub unsafe extern "C" fn $name (
220302
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
221303
) -> int_ty!($bytes) {
222304
core::arch::naked_asm! {
305+
jmp_if_no_lse!("8f"),
306+
// LSEOP s(0), s(0), [x1]
307+
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
308+
"ret",
309+
"8:",
223310
// mov s(tmp0), s(0)
224311
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
225312
"0:",
@@ -231,6 +318,7 @@ macro_rules! fetch_op {
231318
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
232319
"cbnz w15, 0b",
233320
"ret",
321+
sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
234322
}
235323
}
236324
}
@@ -240,25 +328,25 @@ macro_rules! fetch_op {
240328
// We need a single macro to pass to `foreach_ldadd`.
241329
macro_rules! add {
242330
($ordering:ident, $bytes:tt, $name:ident) => {
243-
fetch_op! { $ordering, $bytes, $name, "add" }
331+
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
244332
};
245333
}
246334

247335
macro_rules! and {
248336
($ordering:ident, $bytes:tt, $name:ident) => {
249-
fetch_op! { $ordering, $bytes, $name, "bic" }
337+
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
250338
};
251339
}
252340

253341
macro_rules! xor {
254342
($ordering:ident, $bytes:tt, $name:ident) => {
255-
fetch_op! { $ordering, $bytes, $name, "eor" }
343+
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
256344
};
257345
}
258346

259347
macro_rules! or {
260348
($ordering:ident, $bytes:tt, $name:ident) => {
261-
fetch_op! { $ordering, $bytes, $name, "orr" }
349+
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
262350
};
263351
}
264352

0 commit comments

Comments
 (0)