From 92b63abdd3a584eac472adf4e4ee0070f3f3aad3 Mon Sep 17 00:00:00 2001 From: Aurelia Molzer <5550310+197g@users.noreply.github.com> Date: Thu, 24 Jul 2025 14:41:07 +0200 Subject: [PATCH] Add non-temporal note for maskmoveu_si128 Like any other non-temporal instructions this has additional safety requirements due to the mismatch with the Rust memory model. It is vital to know when using this instruction. --- crates/core_arch/src/x86/sse2.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 3dabcde18c..1eaa89663b 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -1272,7 +1272,7 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { } /// Conditionally store 8-bit integer elements from `a` into memory using -/// `mask`. +/// `mask` flagged as non-temporal (unlikely to be used again soon). /// /// Elements are not stored when the highest bit is not set in the /// corresponding element. @@ -1281,6 +1281,15 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { /// to be aligned on any particular boundary. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maskmovdqu))]