Skip to content

Commit 3da32fb

Browse files
committed
compiler-builtins: plumb LSE support for aarch64 on linux
Add dynamic support for aarch64 LSE atomic ops on linux targets when optimized-compiler-builtins is not enabled. A hook, __enable_rust_lse, is provided for the runtime to enable them if available. A future patch will use this to enable them if available. The resulting asm should exactly match that of LLVM's compiler-rt builtins, though the symbol naming for the support function and global does not.
1 parent e1b9081 commit 3da32fb

File tree

1 file changed

+74
-9
lines changed

1 file changed

+74
-9
lines changed

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//! which is supported on the current CPU.
77
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88
//!
9-
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10-
//! Use the `compiler-rt` intrinsics if you want LSE support.
11-
//!
129
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310
//!
1411
//! Generate functions for each of the following symbols:
@@ -24,7 +21,18 @@
2421
//! We do something similar, but with macro arguments.
2522
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
2623

27-
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+
use core::sync::atomic::{AtomicU8, Ordering};
25+
26+
/// non-zero if the host supports LSE atomics.
27+
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
28+
29+
intrinsics! {
30+
/// Call to enable LSE in outline atomic operations. The caller must verify
31+
/// LSE operations are supported.
32+
pub extern "C" fn __rust_enable_lse() {
33+
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
34+
}
35+
}
2836

2937
/// Translate a byte size to a Rust type.
3038
#[rustfmt::skip]
@@ -126,6 +134,39 @@ macro_rules! stxp {
126134
};
127135
}
128136

137+
// Check if LSE intrinsic can be used, and jump to label if not.
138+
macro_rules! jmp_if_no_lse {
139+
($label:literal) => {
140+
concat!(
141+
".arch_extension lse; ",
142+
"adrp x16, {have_lse}; ",
143+
"ldrb w16, [x16, :lo12:{have_lse}]; ",
144+
"cbz w16, ",
145+
$label,
146+
";"
147+
)
148+
};
149+
}
150+
151+
// Translate memory ordering to the LSE suffix
152+
#[rustfmt::skip]
153+
macro_rules! lse_mem_sfx {
154+
(Relaxed) => { "" };
155+
(Acquire) => { "a" };
156+
(Release) => { "l" };
157+
(AcqRel) => { "al" };
158+
}
159+
160+
// Generate the aarch64 LSE operation for memory ordering and width
161+
macro_rules! lse {
162+
($op:literal, $order:ident, 16) => {
163+
concat!($op, "p", lse_mem_sfx!($order))
164+
};
165+
($op:literal, $order:ident, $bytes:tt) => {
166+
concat!($op, lse_mem_sfx!($order), size!($bytes))
167+
};
168+
}
169+
129170
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130171
macro_rules! compare_and_swap {
131172
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,6 +178,11 @@ macro_rules! compare_and_swap {
137178
) -> int_ty!($bytes) {
138179
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139180
core::arch::naked_asm! {
181+
jmp_if_no_lse!("8f"),
182+
// CAS s(0), s(1), [x2]
183+
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
184+
"ret",
185+
"8:",
140186
// UXT s(tmp0), s(0)
141187
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
142188
"0:",
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
150196
"cbnz w17, 0b",
151197
"1:",
152198
"ret",
199+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
153200
}
154201
}
155202
}
@@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 {
166213
expected: i128, desired: i128, ptr: *mut i128
167214
) -> i128 {
168215
core::arch::naked_asm! {
216+
jmp_if_no_lse!("8f"),
217+
// CASP x0, x1, x2, x3, [x4]
218+
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
219+
"ret",
220+
"8:",
169221
"mov x16, x0",
170222
"mov x17, x1",
171223
"0:",
@@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 {
179231
"cbnz w15, 0b",
180232
"1:",
181233
"ret",
234+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
182235
}
183236
}
184237
}
@@ -195,6 +248,11 @@ macro_rules! swap {
195248
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
196249
) -> int_ty!($bytes) {
197250
core::arch::naked_asm! {
251+
jmp_if_no_lse!("8f"),
252+
// SWP s(0), s(0), [x1]
253+
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
254+
"ret",
255+
"8:",
198256
// mov s(tmp0), s(0)
199257
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
200258
"0:",
@@ -204,6 +262,7 @@ macro_rules! swap {
204262
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
205263
"cbnz w17, 0b",
206264
"ret",
265+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
207266
}
208267
}
209268
}
@@ -212,14 +271,19 @@ macro_rules! swap {
212271

213272
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214273
macro_rules! fetch_op {
215-
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
274+
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
216275
intrinsics! {
217276
#[maybe_use_optimized_c_shim]
218277
#[unsafe(naked)]
219278
pub unsafe extern "C" fn $name (
220279
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
221280
) -> int_ty!($bytes) {
222281
core::arch::naked_asm! {
282+
jmp_if_no_lse!("8f"),
283+
// LSEOP s(0), s(0), [x1]
284+
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
285+
"ret",
286+
"8:",
223287
// mov s(tmp0), s(0)
224288
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
225289
"0:",
@@ -231,6 +295,7 @@ macro_rules! fetch_op {
231295
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
232296
"cbnz w15, 0b",
233297
"ret",
298+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
234299
}
235300
}
236301
}
@@ -240,25 +305,25 @@ macro_rules! fetch_op {
240305
// We need a single macro to pass to `foreach_ldadd`.
241306
macro_rules! add {
242307
($ordering:ident, $bytes:tt, $name:ident) => {
243-
fetch_op! { $ordering, $bytes, $name, "add" }
308+
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
244309
};
245310
}
246311

247312
macro_rules! and {
248313
($ordering:ident, $bytes:tt, $name:ident) => {
249-
fetch_op! { $ordering, $bytes, $name, "bic" }
314+
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
250315
};
251316
}
252317

253318
macro_rules! xor {
254319
($ordering:ident, $bytes:tt, $name:ident) => {
255-
fetch_op! { $ordering, $bytes, $name, "eor" }
320+
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
256321
};
257322
}
258323

259324
macro_rules! or {
260325
($ordering:ident, $bytes:tt, $name:ident) => {
261-
fetch_op! { $ordering, $bytes, $name, "orr" }
326+
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
262327
};
263328
}
264329

0 commit comments

Comments
 (0)