Skip to content

Use simd_funnel_sh{l,r} and simd_round_ties_even to remove uses of LLVM intrinsics #1848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 6 additions & 48 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23785,14 +23785,7 @@ pub fn vrndph_f16(a: f16) -> f16 {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v4f16"
)]
fn _vrndx_f16(a: float16x4_t) -> float16x4_t;
}
unsafe { _vrndx_f16(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral exact, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
Expand All @@ -23801,14 +23794,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v8f16"
)]
fn _vrndxq_f16(a: float16x8_t) -> float16x8_t;
}
unsafe { _vrndxq_f16(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral exact, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
Expand All @@ -23817,14 +23803,7 @@ pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v2f32"
)]
fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
}
unsafe { _vrndx_f32(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral exact, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
Expand All @@ -23833,14 +23812,7 @@ pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v4f32"
)]
fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
}
unsafe { _vrndxq_f32(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral exact, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
Expand All @@ -23849,14 +23821,7 @@ pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v1f64"
)]
fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
}
unsafe { _vrndx_f64(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral exact, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
Expand All @@ -23865,14 +23830,7 @@ pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(frintx))]
pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.rint.v2f64"
)]
fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
}
unsafe { _vrndxq_f64(a) }
unsafe { simd_round_ties_even(a) }
}
#[doc = "Floating-point round to integral, using current rounding mode"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"]
Expand Down
29 changes: 5 additions & 24 deletions crates/core_arch/src/powerpc/altivec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,25 +360,6 @@ unsafe extern "C" {
#[link_name = "llvm.ppc.altivec.vsrv"]
fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;

#[link_name = "llvm.fshl.v16i8"]
fn fshlb(
a: vector_unsigned_char,
b: vector_unsigned_char,
c: vector_unsigned_char,
) -> vector_unsigned_char;
#[link_name = "llvm.fshl.v8i16"]
fn fshlh(
a: vector_unsigned_short,
b: vector_unsigned_short,
c: vector_unsigned_short,
) -> vector_unsigned_short;
#[link_name = "llvm.fshl.v4i32"]
fn fshlw(
a: vector_unsigned_int,
b: vector_unsigned_int,
c: vector_unsigned_int,
) -> vector_unsigned_int;

#[link_name = "llvm.nearbyint.v4f32"]
fn vrfin(a: vector_float) -> vector_float;
}
Expand Down Expand Up @@ -3193,19 +3174,19 @@ mod sealed {
impl_vec_cntlz! { vec_vcntlzw(vector_unsigned_int) }

macro_rules! impl_vrl {
($fun:ident $intr:ident $ty:ident) => {
($fun:ident $ty:ident) => {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr($fun))]
unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
transmute($intr(transmute(a), transmute(a), transmute(b)))
simd_funnel_shl(a, a, b)
}
};
}

impl_vrl! { vrlb fshlb u8 }
impl_vrl! { vrlh fshlh u16 }
impl_vrl! { vrlw fshlw u32 }
impl_vrl! { vrlb u8 }
impl_vrl! { vrlh u16 }
impl_vrl! { vrlw u32 }

#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorRl {
Expand Down
41 changes: 16 additions & 25 deletions crates/core_arch/src/s390x/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float;
#[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double;

#[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float;
#[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double;

#[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float;
#[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double;

Expand All @@ -101,11 +98,6 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
#[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;

#[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
#[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
#[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
#[link_name = "llvm.fshl.v2i64"] fn fshlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: vector_unsigned_long_long) -> vector_unsigned_long_long;

#[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char;
#[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short;
#[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int;
Expand Down Expand Up @@ -1197,8 +1189,8 @@ mod sealed {
test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }

test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] }
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] }
test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }

#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorRoundc {
Expand All @@ -1221,8 +1213,8 @@ mod sealed {
impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) }
impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) }

impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) }
impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) }
impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_float) }
impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_double) }

#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorTrunc {
Expand Down Expand Up @@ -1411,43 +1403,42 @@ mod sealed {
}

macro_rules! impl_rot {
($fun:ident $intr:ident $ty:ident) => {
($fun:ident $ty:ident) => {
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr($fun))]
unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
transmute($intr(transmute(a), transmute(a), transmute(b)))
simd_funnel_shl(a, a, b)
}
};
}

impl_rot! { verllvb fshlb u8 }
impl_rot! { verllvh fshlh u16 }
impl_rot! { verllvf fshlf u32 }
impl_rot! { verllvg fshlg u64 }
impl_rot! { verllvb u8 }
impl_rot! { verllvh u16 }
impl_rot! { verllvf u32 }
impl_rot! { verllvg u64 }

impl_vec_shift! { [VectorRl vec_rl] (verllvb, verllvh, verllvf, verllvg) }

macro_rules! test_rot_imm {
($fun:ident $instr:ident $intr:ident $ty:ident) => {
($fun:ident $instr:ident $ty:ident) => {
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr($instr))]
unsafe fn $fun(a: t_t_l!($ty), bits: core::ffi::c_ulong) -> t_t_l!($ty) {
// mod by the number of bits in a's element type to prevent UB
let bits = (bits % $ty::BITS as core::ffi::c_ulong) as $ty;
let a = transmute(a);
let b = <t_t_s!($ty)>::splat(bits);

transmute($intr(a, a, transmute(b)))
simd_funnel_shl(a, a, transmute(b))
}
};
}

test_rot_imm! { verllvb_imm verllb fshlb u8 }
test_rot_imm! { verllvh_imm verllh fshlh u16 }
test_rot_imm! { verllvf_imm verllf fshlf u32 }
test_rot_imm! { verllvg_imm verllg fshlg u64 }
test_rot_imm! { verllvb_imm verllb u8 }
test_rot_imm! { verllvh_imm verllh u16 }
test_rot_imm! { verllvf_imm verllf u32 }
test_rot_imm! { verllvg_imm verllg u64 }

#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorRli {
Expand Down
Loading