Skip to content

Commit dfa880a

Browse files
committed
aarch64: use simd_reduce_add_unordered
1 parent 9430936 commit dfa880a

File tree

4 files changed

+58
-217
lines changed

4 files changed

+58
-217
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 16 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -604,14 +604,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 {
604604
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
605605
#[cfg_attr(test, assert_instr(addp))]
606606
pub fn vaddv_s32(a: int32x2_t) -> i32 {
607-
unsafe extern "unadjusted" {
608-
#[cfg_attr(
609-
any(target_arch = "aarch64", target_arch = "arm64ec"),
610-
link_name = "llvm.aarch64.neon.saddv.i32.v2i32"
611-
)]
612-
fn _vaddv_s32(a: int32x2_t) -> i32;
613-
}
614-
unsafe { _vaddv_s32(a) }
607+
unsafe { simd_reduce_add_unordered(a) }
615608
}
616609
#[doc = "Add across vector"]
617610
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
@@ -620,14 +613,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 {
620613
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
621614
#[cfg_attr(test, assert_instr(addv))]
622615
pub fn vaddv_s8(a: int8x8_t) -> i8 {
623-
unsafe extern "unadjusted" {
624-
#[cfg_attr(
625-
any(target_arch = "aarch64", target_arch = "arm64ec"),
626-
link_name = "llvm.aarch64.neon.saddv.i8.v8i8"
627-
)]
628-
fn _vaddv_s8(a: int8x8_t) -> i8;
629-
}
630-
unsafe { _vaddv_s8(a) }
616+
unsafe { simd_reduce_add_unordered(a) }
631617
}
632618
#[doc = "Add across vector"]
633619
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
@@ -636,14 +622,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 {
636622
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
637623
#[cfg_attr(test, assert_instr(addv))]
638624
pub fn vaddvq_s8(a: int8x16_t) -> i8 {
639-
unsafe extern "unadjusted" {
640-
#[cfg_attr(
641-
any(target_arch = "aarch64", target_arch = "arm64ec"),
642-
link_name = "llvm.aarch64.neon.saddv.i8.v16i8"
643-
)]
644-
fn _vaddvq_s8(a: int8x16_t) -> i8;
645-
}
646-
unsafe { _vaddvq_s8(a) }
625+
unsafe { simd_reduce_add_unordered(a) }
647626
}
648627
#[doc = "Add across vector"]
649628
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
@@ -652,14 +631,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 {
652631
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
653632
#[cfg_attr(test, assert_instr(addv))]
654633
pub fn vaddv_s16(a: int16x4_t) -> i16 {
655-
unsafe extern "unadjusted" {
656-
#[cfg_attr(
657-
any(target_arch = "aarch64", target_arch = "arm64ec"),
658-
link_name = "llvm.aarch64.neon.saddv.i16.v4i16"
659-
)]
660-
fn _vaddv_s16(a: int16x4_t) -> i16;
661-
}
662-
unsafe { _vaddv_s16(a) }
634+
unsafe { simd_reduce_add_unordered(a) }
663635
}
664636
#[doc = "Add across vector"]
665637
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
@@ -668,14 +640,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 {
668640
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
669641
#[cfg_attr(test, assert_instr(addv))]
670642
pub fn vaddvq_s16(a: int16x8_t) -> i16 {
671-
unsafe extern "unadjusted" {
672-
#[cfg_attr(
673-
any(target_arch = "aarch64", target_arch = "arm64ec"),
674-
link_name = "llvm.aarch64.neon.saddv.i16.v8i16"
675-
)]
676-
fn _vaddvq_s16(a: int16x8_t) -> i16;
677-
}
678-
unsafe { _vaddvq_s16(a) }
643+
unsafe { simd_reduce_add_unordered(a) }
679644
}
680645
#[doc = "Add across vector"]
681646
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
@@ -684,14 +649,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 {
684649
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
685650
#[cfg_attr(test, assert_instr(addv))]
686651
pub fn vaddvq_s32(a: int32x4_t) -> i32 {
687-
unsafe extern "unadjusted" {
688-
#[cfg_attr(
689-
any(target_arch = "aarch64", target_arch = "arm64ec"),
690-
link_name = "llvm.aarch64.neon.saddv.i32.v4i32"
691-
)]
692-
fn _vaddvq_s32(a: int32x4_t) -> i32;
693-
}
694-
unsafe { _vaddvq_s32(a) }
652+
unsafe { simd_reduce_add_unordered(a) }
695653
}
696654
#[doc = "Add across vector"]
697655
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
@@ -700,14 +658,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 {
700658
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
701659
#[cfg_attr(test, assert_instr(addp))]
702660
pub fn vaddv_u32(a: uint32x2_t) -> u32 {
703-
unsafe extern "unadjusted" {
704-
#[cfg_attr(
705-
any(target_arch = "aarch64", target_arch = "arm64ec"),
706-
link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"
707-
)]
708-
fn _vaddv_u32(a: uint32x2_t) -> u32;
709-
}
710-
unsafe { _vaddv_u32(a) }
661+
unsafe { simd_reduce_add_unordered(a) }
711662
}
712663
#[doc = "Add across vector"]
713664
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
@@ -716,14 +667,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 {
716667
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
717668
#[cfg_attr(test, assert_instr(addv))]
718669
pub fn vaddv_u8(a: uint8x8_t) -> u8 {
719-
unsafe extern "unadjusted" {
720-
#[cfg_attr(
721-
any(target_arch = "aarch64", target_arch = "arm64ec"),
722-
link_name = "llvm.aarch64.neon.uaddv.i8.v8i8"
723-
)]
724-
fn _vaddv_u8(a: uint8x8_t) -> u8;
725-
}
726-
unsafe { _vaddv_u8(a) }
670+
unsafe { simd_reduce_add_unordered(a) }
727671
}
728672
#[doc = "Add across vector"]
729673
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
@@ -732,14 +676,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 {
732676
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
733677
#[cfg_attr(test, assert_instr(addv))]
734678
pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
735-
unsafe extern "unadjusted" {
736-
#[cfg_attr(
737-
any(target_arch = "aarch64", target_arch = "arm64ec"),
738-
link_name = "llvm.aarch64.neon.uaddv.i8.v16i8"
739-
)]
740-
fn _vaddvq_u8(a: uint8x16_t) -> u8;
741-
}
742-
unsafe { _vaddvq_u8(a) }
679+
unsafe { simd_reduce_add_unordered(a) }
743680
}
744681
#[doc = "Add across vector"]
745682
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
@@ -748,14 +685,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
748685
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
749686
#[cfg_attr(test, assert_instr(addv))]
750687
pub fn vaddv_u16(a: uint16x4_t) -> u16 {
751-
unsafe extern "unadjusted" {
752-
#[cfg_attr(
753-
any(target_arch = "aarch64", target_arch = "arm64ec"),
754-
link_name = "llvm.aarch64.neon.uaddv.i16.v4i16"
755-
)]
756-
fn _vaddv_u16(a: uint16x4_t) -> u16;
757-
}
758-
unsafe { _vaddv_u16(a) }
688+
unsafe { simd_reduce_add_unordered(a) }
759689
}
760690
#[doc = "Add across vector"]
761691
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
@@ -764,14 +694,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 {
764694
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
765695
#[cfg_attr(test, assert_instr(addv))]
766696
pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
767-
unsafe extern "unadjusted" {
768-
#[cfg_attr(
769-
any(target_arch = "aarch64", target_arch = "arm64ec"),
770-
link_name = "llvm.aarch64.neon.uaddv.i16.v8i16"
771-
)]
772-
fn _vaddvq_u16(a: uint16x8_t) -> u16;
773-
}
774-
unsafe { _vaddvq_u16(a) }
697+
unsafe { simd_reduce_add_unordered(a) }
775698
}
776699
#[doc = "Add across vector"]
777700
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
@@ -780,14 +703,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
780703
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
781704
#[cfg_attr(test, assert_instr(addv))]
782705
pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
783-
unsafe extern "unadjusted" {
784-
#[cfg_attr(
785-
any(target_arch = "aarch64", target_arch = "arm64ec"),
786-
link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"
787-
)]
788-
fn _vaddvq_u32(a: uint32x4_t) -> u32;
789-
}
790-
unsafe { _vaddvq_u32(a) }
706+
unsafe { simd_reduce_add_unordered(a) }
791707
}
792708
#[doc = "Add across vector"]
793709
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
@@ -796,14 +712,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
796712
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
797713
#[cfg_attr(test, assert_instr(addp))]
798714
pub fn vaddvq_s64(a: int64x2_t) -> i64 {
799-
unsafe extern "unadjusted" {
800-
#[cfg_attr(
801-
any(target_arch = "aarch64", target_arch = "arm64ec"),
802-
link_name = "llvm.aarch64.neon.saddv.i64.v2i64"
803-
)]
804-
fn _vaddvq_s64(a: int64x2_t) -> i64;
805-
}
806-
unsafe { _vaddvq_s64(a) }
715+
unsafe { simd_reduce_add_unordered(a) }
807716
}
808717
#[doc = "Add across vector"]
809718
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
@@ -812,14 +721,7 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 {
812721
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
813722
#[cfg_attr(test, assert_instr(addp))]
814723
pub fn vaddvq_u64(a: uint64x2_t) -> u64 {
815-
unsafe extern "unadjusted" {
816-
#[cfg_attr(
817-
any(target_arch = "aarch64", target_arch = "arm64ec"),
818-
link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"
819-
)]
820-
fn _vaddvq_u64(a: uint64x2_t) -> u64;
821-
}
822-
unsafe { _vaddvq_u64(a) }
724+
unsafe { simd_reduce_add_unordered(a) }
823725
}
824726
#[doc = "Multi-vector floating-point absolute maximum"]
825727
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"]
@@ -15951,23 +15853,11 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 {
1595115853
#[doc = "Add pairwise"]
1595215854
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
1595315855
#[inline]
15954-
#[cfg(target_endian = "little")]
15955-
#[target_feature(enable = "neon")]
15956-
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15957-
#[cfg_attr(test, assert_instr(addp))]
15958-
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15959-
unsafe { transmute(vaddvq_u64(transmute(a))) }
15960-
}
15961-
#[doc = "Add pairwise"]
15962-
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
15963-
#[inline]
15964-
#[cfg(target_endian = "big")]
1596515856
#[target_feature(enable = "neon")]
1596615857
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1596715858
#[cfg_attr(test, assert_instr(addp))]
1596815859
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15969-
let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
15970-
unsafe { transmute(vaddvq_u64(transmute(a))) }
15860+
unsafe { simd_reduce_add_unordered(a) }
1597115861
}
1597215862
#[doc = "Add pairwise"]
1597315863
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
@@ -15976,7 +15866,7 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 {
1597615866
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1597715867
#[cfg_attr(test, assert_instr(addp))]
1597815868
pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
15979-
vaddvq_u64(a)
15869+
unsafe { simd_reduce_add_unordered(a) }
1598015870
}
1598115871
#[doc = "Floating-point add pairwise"]
1598215872
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,15 +1483,11 @@ pub fn vabsq_f32(a: float32x4_t) -> float32x4_t {
14831483
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
14841484
)]
14851485
pub fn vabs_s8(a: int8x8_t) -> int8x8_t {
1486-
unsafe extern "unadjusted" {
1487-
#[cfg_attr(
1488-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1489-
link_name = "llvm.aarch64.neon.abs.v8i8"
1490-
)]
1491-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
1492-
fn _vabs_s8(a: int8x8_t) -> int8x8_t;
1486+
unsafe {
1487+
let neg: int8x8_t = simd_neg(a);
1488+
let mask: int8x8_t = simd_ge(a, neg);
1489+
simd_select(mask, a, neg)
14931490
}
1494-
unsafe { _vabs_s8(a) }
14951491
}
14961492
#[doc = "Absolute value (wrapping)."]
14971493
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s8)"]
@@ -1512,15 +1508,11 @@ pub fn vabs_s8(a: int8x8_t) -> int8x8_t {
15121508
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
15131509
)]
15141510
pub fn vabsq_s8(a: int8x16_t) -> int8x16_t {
1515-
unsafe extern "unadjusted" {
1516-
#[cfg_attr(
1517-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1518-
link_name = "llvm.aarch64.neon.abs.v16i8"
1519-
)]
1520-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")]
1521-
fn _vabsq_s8(a: int8x16_t) -> int8x16_t;
1511+
unsafe {
1512+
let neg: int8x16_t = simd_neg(a);
1513+
let mask: int8x16_t = simd_ge(a, neg);
1514+
simd_select(mask, a, neg)
15221515
}
1523-
unsafe { _vabsq_s8(a) }
15241516
}
15251517
#[doc = "Absolute value (wrapping)."]
15261518
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s16)"]
@@ -1541,15 +1533,11 @@ pub fn vabsq_s8(a: int8x16_t) -> int8x16_t {
15411533
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
15421534
)]
15431535
pub fn vabs_s16(a: int16x4_t) -> int16x4_t {
1544-
unsafe extern "unadjusted" {
1545-
#[cfg_attr(
1546-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1547-
link_name = "llvm.aarch64.neon.abs.v4i16"
1548-
)]
1549-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")]
1550-
fn _vabs_s16(a: int16x4_t) -> int16x4_t;
1536+
unsafe {
1537+
let neg: int16x4_t = simd_neg(a);
1538+
let mask: int16x4_t = simd_ge(a, neg);
1539+
simd_select(mask, a, neg)
15511540
}
1552-
unsafe { _vabs_s16(a) }
15531541
}
15541542
#[doc = "Absolute value (wrapping)."]
15551543
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s16)"]
@@ -1570,15 +1558,11 @@ pub fn vabs_s16(a: int16x4_t) -> int16x4_t {
15701558
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
15711559
)]
15721560
pub fn vabsq_s16(a: int16x8_t) -> int16x8_t {
1573-
unsafe extern "unadjusted" {
1574-
#[cfg_attr(
1575-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1576-
link_name = "llvm.aarch64.neon.abs.v8i16"
1577-
)]
1578-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")]
1579-
fn _vabsq_s16(a: int16x8_t) -> int16x8_t;
1561+
unsafe {
1562+
let neg: int16x8_t = simd_neg(a);
1563+
let mask: int16x8_t = simd_ge(a, neg);
1564+
simd_select(mask, a, neg)
15801565
}
1581-
unsafe { _vabsq_s16(a) }
15821566
}
15831567
#[doc = "Absolute value (wrapping)."]
15841568
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s32)"]
@@ -1599,15 +1583,11 @@ pub fn vabsq_s16(a: int16x8_t) -> int16x8_t {
15991583
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
16001584
)]
16011585
pub fn vabs_s32(a: int32x2_t) -> int32x2_t {
1602-
unsafe extern "unadjusted" {
1603-
#[cfg_attr(
1604-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1605-
link_name = "llvm.aarch64.neon.abs.v2i32"
1606-
)]
1607-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")]
1608-
fn _vabs_s32(a: int32x2_t) -> int32x2_t;
1586+
unsafe {
1587+
let neg: int32x2_t = simd_neg(a);
1588+
let mask: int32x2_t = simd_ge(a, neg);
1589+
simd_select(mask, a, neg)
16091590
}
1610-
unsafe { _vabs_s32(a) }
16111591
}
16121592
#[doc = "Absolute value (wrapping)."]
16131593
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s32)"]
@@ -1628,15 +1608,11 @@ pub fn vabs_s32(a: int32x2_t) -> int32x2_t {
16281608
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
16291609
)]
16301610
pub fn vabsq_s32(a: int32x4_t) -> int32x4_t {
1631-
unsafe extern "unadjusted" {
1632-
#[cfg_attr(
1633-
any(target_arch = "aarch64", target_arch = "arm64ec"),
1634-
link_name = "llvm.aarch64.neon.abs.v4i32"
1635-
)]
1636-
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
1637-
fn _vabsq_s32(a: int32x4_t) -> int32x4_t;
1611+
unsafe {
1612+
let neg: int32x4_t = simd_neg(a);
1613+
let mask: int32x4_t = simd_ge(a, neg);
1614+
simd_select(mask, a, neg)
16381615
}
1639-
unsafe { _vabsq_s32(a) }
16401616
}
16411617
#[doc = "Floating-point absolute value"]
16421618
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsh_f16)"]

0 commit comments

Comments
 (0)