diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 96ed82021b..692b8ec38f 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -23785,14 +23785,7 @@ pub fn vrndph_f16(a: f16) -> f16 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v4f16" - )] - fn _vrndx_f16(a: float16x4_t) -> float16x4_t; - } - unsafe { _vrndx_f16(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] @@ -23801,14 +23794,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v8f16" - )] - fn _vrndxq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrndxq_f16(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] @@ -23817,14 +23803,7 @@ pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v2f32" - )] - fn _vrndx_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrndx_f32(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"] @@ -23833,14 +23812,7 @@ pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v4f32" - )] - fn _vrndxq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrndxq_f32(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"] @@ -23849,14 +23821,7 @@ pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v1f64" - )] - fn _vrndx_f64(a: float64x1_t) -> float64x1_t; - } - unsafe { _vrndx_f64(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"] @@ -23865,14 +23830,7 @@ pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(frintx))] pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.rint.v2f64" - )] - fn _vrndxq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrndxq_f64(a) } + unsafe { simd_round_ties_even(a) } } #[doc = "Floating-point round to integral, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 2deeb53c20..a7bbf35ed8 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -360,25 +360,6 @@ unsafe extern "C" { #[link_name = "llvm.ppc.altivec.vsrv"] fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.fshl.v16i8"] - fn fshlb( - a: vector_unsigned_char, - b: vector_unsigned_char, - c: vector_unsigned_char, - ) -> vector_unsigned_char; - #[link_name = "llvm.fshl.v8i16"] - fn fshlh( - a: vector_unsigned_short, - b: vector_unsigned_short, - c: vector_unsigned_short, - ) -> vector_unsigned_short; - #[link_name = "llvm.fshl.v4i32"] - fn fshlw( - a: vector_unsigned_int, - b: vector_unsigned_int, - c: vector_unsigned_int, - ) -> vector_unsigned_int; - #[link_name = "llvm.nearbyint.v4f32"] fn vrfin(a: vector_float) -> vector_float; } @@ -3193,19 +3174,19 @@ mod sealed { impl_vec_cntlz! { vec_vcntlzw(vector_unsigned_int) } macro_rules! impl_vrl { - ($fun:ident $intr:ident $ty:ident) => { + ($fun:ident $ty:ident) => { #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr($fun))] unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { - transmute($intr(transmute(a), transmute(a), transmute(b))) + simd_funnel_shl(a, a, b) } }; } - impl_vrl! { vrlb fshlb u8 } - impl_vrl! { vrlh fshlh u16 } - impl_vrl! { vrlw fshlw u32 } + impl_vrl! { vrlb u8 } + impl_vrl! { vrlh u16 } + impl_vrl! { vrlw u32 } #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorRl { diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index ae5c37ce01..c46c09303d 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -83,9 +83,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float; #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double; - #[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float; - #[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double; - #[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float; #[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double; @@ -101,11 +98,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16; #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16; - #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; - #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.fshl.v2i64"] fn fshlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: vector_unsigned_long_long) -> vector_unsigned_long_long; - #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char; #[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short; #[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int; @@ -1197,8 +1189,8 @@ mod sealed { test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] } test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] } - test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] } - test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] } + test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] } + test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] } #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorRoundc { @@ -1221,8 +1213,8 @@ mod sealed { impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) } impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) } - impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) } - impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) } + impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_float) } + impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_double) } #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorTrunc { @@ -1411,43 +1403,42 @@ mod sealed { } macro_rules! impl_rot { - ($fun:ident $intr:ident $ty:ident) => { + ($fun:ident $ty:ident) => { #[inline] #[target_feature(enable = "vector")] #[cfg_attr(test, assert_instr($fun))] unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { - transmute($intr(transmute(a), transmute(a), transmute(b))) + simd_funnel_shl(a, a, b) } }; } - impl_rot! { verllvb fshlb u8 } - impl_rot! { verllvh fshlh u16 } - impl_rot! { verllvf fshlf u32 } - impl_rot! { verllvg fshlg u64 } + impl_rot! { verllvb u8 } + impl_rot! { verllvh u16 } + impl_rot! { verllvf u32 } + impl_rot! { verllvg u64 } impl_vec_shift! { [VectorRl vec_rl] (verllvb, verllvh, verllvf, verllvg) } macro_rules! test_rot_imm { - ($fun:ident $instr:ident $intr:ident $ty:ident) => { + ($fun:ident $instr:ident $ty:ident) => { #[inline] #[target_feature(enable = "vector")] #[cfg_attr(test, assert_instr($instr))] unsafe fn $fun(a: t_t_l!($ty), bits: core::ffi::c_ulong) -> t_t_l!($ty) { // mod by the number of bits in a's element type to prevent UB let bits = (bits % $ty::BITS as core::ffi::c_ulong) as $ty; - let a = transmute(a); let b = ::splat(bits); - transmute($intr(a, a, transmute(b))) + simd_funnel_shl(a, a, transmute(b)) } }; } - test_rot_imm! { verllvb_imm verllb fshlb u8 } - test_rot_imm! { verllvh_imm verllh fshlh u16 } - test_rot_imm! { verllvf_imm verllf fshlf u32 } - test_rot_imm! { verllvg_imm verllg fshlg u64 } + test_rot_imm! { verllvb_imm verllb u8 } + test_rot_imm! { verllvh_imm verllh u16 } + test_rot_imm! { verllvf_imm verllf u32 } + test_rot_imm! { verllvg_imm verllg u64 } #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorRli { diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index c722f7b370..09a90e29bf 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -500,7 +500,7 @@ pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } + unsafe { transmute(simd_funnel_shl(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -539,7 +539,7 @@ pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } + unsafe { transmute(simd_funnel_shl(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -578,7 +578,7 @@ pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } + unsafe { transmute(simd_funnel_shl(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -617,7 +617,7 @@ pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } + unsafe { transmute(simd_funnel_shl(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -656,7 +656,7 @@ pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } + unsafe { transmute(simd_funnel_shl(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -695,7 +695,7 @@ pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } + unsafe { transmute(simd_funnel_shl(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -734,7 +734,7 @@ pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } + unsafe { transmute(simd_funnel_shl(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -773,7 +773,7 @@ pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } + unsafe { transmute(simd_funnel_shl(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -812,7 +812,7 @@ pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } + unsafe { transmute(simd_funnel_shl(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -851,7 +851,7 @@ pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } + unsafe { transmute(simd_funnel_shr(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -890,7 +890,7 @@ pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } + unsafe { transmute(simd_funnel_shr(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -929,7 +929,7 @@ pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } + unsafe { transmute(simd_funnel_shr(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -968,7 +968,7 @@ pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } + unsafe { transmute(simd_funnel_shr(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1007,7 +1007,7 @@ pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } + unsafe { transmute(simd_funnel_shr(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1046,7 +1046,7 @@ pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } + unsafe { transmute(simd_funnel_shr(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1085,7 +1085,7 @@ pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } + unsafe { transmute(simd_funnel_shr(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1124,7 +1124,7 @@ pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } + unsafe { transmute(simd_funnel_shr(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1163,7 +1163,7 @@ pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } + unsafe { transmute(simd_funnel_shr(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -2138,44 +2138,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.expand.b.128"] fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; - #[link_name = "llvm.fshl.v8i64"] - fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; - #[link_name = "llvm.fshl.v4i64"] - fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; - #[link_name = "llvm.fshl.v2i64"] - fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; - #[link_name = "llvm.fshl.v16i32"] - fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; - #[link_name = "llvm.fshl.v8i32"] - fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; - #[link_name = "llvm.fshl.v4i32"] - fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; - #[link_name = "llvm.fshl.v32i16"] - fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; - #[link_name = "llvm.fshl.v16i16"] - fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; - #[link_name = "llvm.fshl.v8i16"] - fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; - - #[link_name = "llvm.fshr.v8i64"] - fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; - #[link_name = "llvm.fshr.v4i64"] - fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; - #[link_name = "llvm.fshr.v2i64"] - fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; - #[link_name = "llvm.fshr.v16i32"] - fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; - #[link_name = "llvm.fshr.v8i32"] - fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; - #[link_name = "llvm.fshr.v4i32"] - fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; - #[link_name = "llvm.fshr.v32i16"] - fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; - #[link_name = "llvm.fshr.v16i16"] - fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; - #[link_name = "llvm.fshr.v8i16"] - fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; - #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"] fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16; #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"] diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index f658267b9a..97a1f48b63 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -2976,11 +2976,7 @@ intrinsics: - float64x1_t - float64x2_t compose: - - LLVMLink: - name: "llvm.rint.{neon_type}" - links: - - link: "llvm.rint.{neon_type}" - arch: aarch64,arm64ec + - FnCall: [simd_round_ties_even, [a]] - name: "vrndx{neon_type.no}" @@ -2996,11 +2992,7 @@ intrinsics: - float16x4_t - float16x8_t compose: - - LLVMLink: - name: "llvm.rint.{neon_type}" - links: - - link: "llvm.rint.{neon_type}" - arch: aarch64,arm64ec + - FnCall: [simd_round_ties_even, [a]] - name: "vrndx{type[1]}{type[0]}"