Skip to content

Commit 1ceacf5

Browse files
committed
LoongArch64 LSX fast-path for str.contains(&str)
Benchmark results with LLVM 21 on LA664: ``` OLD: test bench_is_contained_in ... bench: 43.63 ns/iter (+/- 0.04) NEW: test bench_is_contained_in ... bench: 12.81 ns/iter (+/- 0.01) ```
1 parent ace6330 commit 1ceacf5

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

library/core/src/str/pattern.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,10 @@ impl<'b> Pattern for &'b str {
996996
return haystack.as_bytes().contains(&self.as_bytes()[0]);
997997
}
998998

999-
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
999+
#[cfg(any(
1000+
all(target_arch = "x86_64", target_feature = "sse2"),
1001+
all(target_arch = "loongarch64", target_feature = "lsx")
1002+
))]
10001003
if self.len() <= 32 {
10011004
if let Some(result) = simd_contains(self, haystack) {
10021005
return result;
@@ -1770,11 +1773,18 @@ impl TwoWayStrategy for RejectAndMatch {
17701773
/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors
17711774
/// should be evaluated.
17721775
///
1776+
/// Similarly, on LoongArch the 128-bit LSX vector extension is the baseline,
1777+
/// so we also use `u8x16` there. Wider vector widths may be considered
1778+
/// for future LoongArch extensions (e.g., LASX).
1779+
///
17731780
/// For haystacks smaller than vector-size + needle length it falls back to
17741781
/// a naive O(n*m) search so this implementation should not be called on larger needles.
17751782
///
17761783
/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2
1777-
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
1784+
#[cfg(any(
1785+
all(target_arch = "x86_64", target_feature = "sse2"),
1786+
all(target_arch = "loongarch64", target_feature = "lsx")
1787+
))]
17781788
#[inline]
17791789
fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
17801790
let needle = needle.as_bytes();
@@ -1906,7 +1916,10 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
19061916
/// # Safety
19071917
///
19081918
/// Both slices must have the same length.
1909-
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86
1919+
#[cfg(any(
1920+
all(target_arch = "x86_64", target_feature = "sse2"),
1921+
all(target_arch = "loongarch64", target_feature = "lsx")
1922+
))]
19101923
#[inline]
19111924
unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool {
19121925
debug_assert_eq!(x.len(), y.len());

0 commit comments

Comments
 (0)