-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AArch64] Enable svcompact intrinsic in streaming mode with SME2.2 #151703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: Amina Chabane (Amichaxx) ChangesWhen the target enables +sme2p2, the svcompact intrinsic is now available in streaming SVE mode. Amended existing ll tests sve-intrinsics-perm-select.ll and sve2p2-intrinsics.ll. Included Sema test acle_sve_compact.cpp. Full diff: https://github.com/llvm/llvm-project/pull/151703.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 76fd072a41d8b..3d28e4708cf76 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1041,8 +1041,8 @@ defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">;
defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">;
defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">;
-let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in {
-def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">;
+let SVETargetGuard = "sve", SMETargetGuard = "sme2p2" in {
+def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>;
}
// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
diff --git a/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_compact.cpp b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_compact.cpp
new file mode 100644
index 0000000000000..b301e1622f326
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_compact.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +sve -target-feature +sme -target-feature +ssve -target-feature +sme2p2 \
+// RUN: -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+// expected-no-diagnostics
+
+
+#include <arm_sve.h>
+
+void test_svcompact_streaming(svbool_t pg, svfloat32_t op) __arm_streaming {
+ svcompact(pg, op);
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index ef31badb5e1f5..573c5af4a872b 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2,+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SSVE
;
; CLASTA (Vectors)
@@ -586,6 +587,14 @@ define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
; SVE2-NEXT: add z1.d, z1.d, #8 // =0x8
; SVE2-NEXT: tbl z0.d, { z0.d }, z1.d
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: dupq_i64_range:
+; SSVE: // %bb.0:
+; SSVE-NEXT: index z1.d, #0, #1
+; SSVE-NEXT: and z1.d, z1.d, #0x1
+; SSVE-NEXT: add z1.d, z1.d, #8 // =0x8
+; SSVE-NEXT: tbl z0.d, { z0.d }, z1.d
+; SSVE-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
ret <vscale x 2 x i64> %out
}
@@ -594,13 +603,29 @@ define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
;
define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
-; CHECK-LABEL: dupq_f32_repeat_complex:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-NEXT: mov z0.d, d0
-; CHECK-NEXT: ret
+; SVE-LABEL: dupq_f32_repeat_complex:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $s0 killed $s0 def $z0
+; SVE-NEXT: // kill: def $s1 killed $s1 def $q1
+; SVE-NEXT: mov v0.s[1], v1.s[0]
+; SVE-NEXT: mov z0.d, d0
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: dupq_f32_repeat_complex:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $s0 killed $s0 def $z0
+; SVE2-NEXT: // kill: def $s1 killed $s1 def $q1
+; SVE2-NEXT: mov v0.s[1], v1.s[0]
+; SVE2-NEXT: mov z0.d, d0
+; SVE2-NEXT: ret
+;
+; SSVE-LABEL: dupq_f32_repeat_complex:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $s0 killed $s0 def $z0
+; SSVE-NEXT: // kill: def $s1 killed $s1 def $z1
+; SSVE-NEXT: zip1 z0.s, z0.s, z1.s
+; SSVE-NEXT: mov z0.d, d0
+; SSVE-NEXT: ret
%1 = insertelement <4 x float> poison, float %x, i64 0
%2 = insertelement <4 x float> %1, float %y, i64 1
%3 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %2, i64 0)
@@ -611,13 +636,29 @@ define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %
}
define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
-; CHECK-LABEL: dupq_f16_repeat_complex:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
-; CHECK-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: ret
+; SVE-LABEL: dupq_f16_repeat_complex:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $h0 killed $h0 def $z0
+; SVE-NEXT: // kill: def $h1 killed $h1 def $q1
+; SVE-NEXT: mov v0.h[1], v1.h[0]
+; SVE-NEXT: mov z0.s, s0
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: dupq_f16_repeat_complex:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $h0 killed $h0 def $z0
+; SVE2-NEXT: // kill: def $h1 killed $h1 def $q1
+; SVE2-NEXT: mov v0.h[1], v1.h[0]
+; SVE2-NEXT: mov z0.s, s0
+; SVE2-NEXT: ret
+;
+; SSVE-LABEL: dupq_f16_repeat_complex:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $h0 killed $h0 def $z0
+; SSVE-NEXT: // kill: def $h1 killed $h1 def $z1
+; SSVE-NEXT: zip1 z0.h, z0.h, z1.h
+; SSVE-NEXT: mov z0.s, s0
+; SSVE-NEXT: ret
%1 = insertelement <8 x half> poison, half %x, i64 0
%2 = insertelement <8 x half> %1, half %y, i64 1
%3 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %2, i64 0)
@@ -639,6 +680,13 @@ define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #255
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_i8:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #255
+; SSVE-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b,
i32 255)
@@ -657,6 +705,13 @@ define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #0
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_i16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #0
+; SSVE-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b,
i32 0)
@@ -675,6 +730,13 @@ define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_i32:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #4
+; SSVE-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b,
i32 1)
@@ -693,6 +755,13 @@ define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_i64:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #16
+; SSVE-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b,
i32 2)
@@ -711,6 +780,13 @@ define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x b
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_bf16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #6
+; SSVE-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
<vscale x 8 x bfloat> %b,
i32 3)
@@ -729,6 +805,13 @@ define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_f16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #6
+; SSVE-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
i32 3)
@@ -747,6 +830,13 @@ define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x floa
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_f32:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #16
+; SSVE-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
i32 4)
@@ -765,6 +855,13 @@ define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x do
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #40
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: ext_f64:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: ext z0.b, { z0.b, z1.b }, #40
+; SSVE-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
i32 5)
@@ -1158,6 +1255,13 @@ define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.b, p0, { z0.b, z1.b }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_i8:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.b, p0, { z0.b, z1.b }
+; SSVE-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
@@ -1176,6 +1280,13 @@ define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_i16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; SSVE-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
@@ -1194,6 +1305,13 @@ define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_i32:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.s, p0, { z0.s, z1.s }
+; SSVE-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
@@ -1212,6 +1330,13 @@ define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_i64:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.d, p0, { z0.d, z1.d }
+; SSVE-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
@@ -1230,6 +1355,13 @@ define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_bf16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; SSVE-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
<vscale x 8 x bfloat> %a,
<vscale x 8 x bfloat> %b)
@@ -1248,6 +1380,13 @@ define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_f16:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; SSVE-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b)
@@ -1266,6 +1405,13 @@ define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_f32:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.s, p0, { z0.s, z1.s }
+; SSVE-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b)
@@ -1284,6 +1430,13 @@ define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d }
; SVE2-NEXT: ret
+;
+; SSVE-LABEL: splice_f64:
+; SSVE: // %bb.0:
+; SSVE-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SSVE-NEXT: splice z0.d, p0, { z0.d, z1.d }
+; SSVE-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
index 6017070b114a5..464cdd6605b0e 100644
--- a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p2 < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2,+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
;
; COMPACT
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you confirm you're intentionally ignoring the byte and halfword variants of compact that are also available with +sme2p2? This works for me, I just want to make sure I'm reviewing the PR properly.
No, it was an error on my part. If it's okay with you, I want to make that change. |
@paulwalker-arm I asked about the byte and halfword variants just to be sure and I believe they are still awaiting approval, so I will be just focusing on the word and doubleword variants in this patch. Thanks. |
3b5e9b6
to
e01d9a3
Compare
…e in streaming mode. Included Sema test acle_sve_compact.cpp. - Updated arm_sve.td - Updated Sema test - Removed changes to .ll tests
e01d9a3
to
ca704a5
Compare
When the target enables +sme2p2, the svcompact intrinsic is now available in streaming SVE mode, through updating the guards in arm_sve.td. Included Sema test acle_sve_compact.cpp.