Skip to content

Commit e1932ff

Browse files
committed
[SveEmitter] Add builtins for ternary ops (fmla, fmad, etc)
This patch adds builtins for: - svmad, svmla, svmls, svmsb svnmad, svnmla, svnmls, svnmsb svmla_lane, svmls_lane These builtins come in several flavours: - Merge into first source vector (`_m`) - False lanes are undef (`_x`) - False lanes are zeroed (`_z`) And can also have `_n` to indicate the last operand is a scalar. For example: svint32_t svmla[_n_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D78960
1 parent 22fdbd0 commit e1932ff

File tree

9 files changed

+3401
-1
lines changed

9 files changed

+3401
-1
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,23 @@ defm SVMULH_S : SInstZPZZ<"svmulh", "csil", "aarch64_sve_smulh">;
609609
defm SVMULH_U : SInstZPZZ<"svmulh", "UcUsUiUl", "aarch64_sve_umulh">;
610610
defm SVSUB : SInstZPZZ<"svsub", "csilUcUsUiUl", "aarch64_sve_sub">;
611611
defm SVSUBR : SInstZPZZ<"svsubr", "csilUcUsUiUl", "aarch64_sve_subr">;
612+
613+
//------------------------------------------------------------------------------
614+
615+
multiclass SInstZPZZZ<string name, string types, string intrinsic, list<FlagType> flags=[]> {
616+
def _M : SInst<name # "[_{d}]", "dPddd", types, MergeOp1, intrinsic, flags>;
617+
def _X : SInst<name # "[_{d}]", "dPddd", types, MergeAny, intrinsic, flags>;
618+
def _Z : SInst<name # "[_{d}]", "dPddd", types, MergeZero, intrinsic, flags>;
619+
620+
def _N_M : SInst<name # "[_n_{d}]", "dPdda", types, MergeOp1, intrinsic, flags>;
621+
def _N_X : SInst<name # "[_n_{d}]", "dPdda", types, MergeAny, intrinsic, flags>;
622+
def _N_Z : SInst<name # "[_n_{d}]", "dPdda", types, MergeZero, intrinsic, flags>;
623+
}
624+
625+
defm SVMAD : SInstZPZZZ<"svmad", "csilUcUsUiUl", "aarch64_sve_mad">;
626+
defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla">;
627+
defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls">;
628+
defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb">;
612629
////////////////////////////////////////////////////////////////////////////////
613630
// Permutations and selection
614631
def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>;
@@ -696,9 +713,19 @@ defm SVNEG_F : SInstZPZ<"svneg", "hfd", "aarch64_sve_fneg">;
696713

697714
def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
698715

699-
def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
716+
defm SVMAD_F : SInstZPZZZ<"svmad", "hfd", "aarch64_sve_fmad">;
717+
defm SVMLA_F : SInstZPZZZ<"svmla", "hfd", "aarch64_sve_fmla">;
718+
defm SVMLS_F : SInstZPZZZ<"svmls", "hfd", "aarch64_sve_fmls">;
719+
defm SVMSB_F : SInstZPZZZ<"svmsb", "hfd", "aarch64_sve_fmsb">;
720+
defm SVNMAD_F : SInstZPZZZ<"svnmad", "hfd", "aarch64_sve_fnmad">;
721+
defm SVNMLA_F : SInstZPZZZ<"svnmla", "hfd", "aarch64_sve_fnmla">;
722+
defm SVNMLS_F : SInstZPZZZ<"svnmls", "hfd", "aarch64_sve_fnmls">;
723+
defm SVNMSB_F : SInstZPZZZ<"svnmsb", "hfd", "aarch64_sve_fnmsb">;
724+
700725
def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>,
701726
ImmCheck<4, ImmCheckComplexRotAll90>]>;
727+
def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
728+
def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
702729

703730
////////////////////////////////////////////////////////////////////////////////
704731
// Floating-point comparisons

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c

Lines changed: 646 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c

Lines changed: 635 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c

Lines changed: 694 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c

Lines changed: 646 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
11+
#endif
12+
13+
svfloat16_t test_svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
14+
{
15+
// CHECK-LABEL: test_svnmad_f16_z
16+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
17+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
18+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3)
19+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
20+
return SVE_ACLE_FUNC(svnmad,_f16,_z,)(pg, op1, op2, op3);
21+
}
22+
23+
svfloat32_t test_svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
24+
{
25+
// CHECK-LABEL: test_svnmad_f32_z
26+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
27+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.sel.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> zeroinitializer)
28+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %[[SEL]], <vscale x 4 x float> %op2, <vscale x 4 x float> %op3)
29+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
30+
return SVE_ACLE_FUNC(svnmad,_f32,_z,)(pg, op1, op2, op3);
31+
}
32+
33+
svfloat64_t test_svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
34+
{
35+
// CHECK-LABEL: test_svnmad_f64_z
36+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
37+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> zeroinitializer)
38+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %[[SEL]], <vscale x 2 x double> %op2, <vscale x 2 x double> %op3)
39+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
40+
return SVE_ACLE_FUNC(svnmad,_f64,_z,)(pg, op1, op2, op3);
41+
}
42+
43+
svfloat16_t test_svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
44+
{
45+
// CHECK-LABEL: test_svnmad_f16_m
46+
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
47+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3)
48+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
49+
return SVE_ACLE_FUNC(svnmad,_f16,_m,)(pg, op1, op2, op3);
50+
}
51+
52+
svfloat32_t test_svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
53+
{
54+
// CHECK-LABEL: test_svnmad_f32_m
55+
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
56+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3)
57+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
58+
return SVE_ACLE_FUNC(svnmad,_f32,_m,)(pg, op1, op2, op3);
59+
}
60+
61+
svfloat64_t test_svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
62+
{
63+
// CHECK-LABEL: test_svnmad_f64_m
64+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
65+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3)
66+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
67+
return SVE_ACLE_FUNC(svnmad,_f64,_m,)(pg, op1, op2, op3);
68+
}
69+
70+
svfloat16_t test_svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
71+
{
72+
// CHECK-LABEL: test_svnmad_f16_x
73+
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
74+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3)
75+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
76+
return SVE_ACLE_FUNC(svnmad,_f16,_x,)(pg, op1, op2, op3);
77+
}
78+
79+
svfloat32_t test_svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
80+
{
81+
// CHECK-LABEL: test_svnmad_f32_x
82+
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
83+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3)
84+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
85+
return SVE_ACLE_FUNC(svnmad,_f32,_x,)(pg, op1, op2, op3);
86+
}
87+
88+
svfloat64_t test_svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
89+
{
90+
// CHECK-LABEL: test_svnmad_f64_x
91+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
92+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3)
93+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
94+
return SVE_ACLE_FUNC(svnmad,_f64,_x,)(pg, op1, op2, op3);
95+
}
96+
97+
svfloat16_t test_svnmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
98+
{
99+
// CHECK-LABEL: test_svnmad_n_f16_z
100+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
101+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %op3)
102+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
103+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %[[DUP]])
104+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
105+
return SVE_ACLE_FUNC(svnmad,_n_f16,_z,)(pg, op1, op2, op3);
106+
}
107+
108+
svfloat32_t test_svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
109+
{
110+
// CHECK-LABEL: test_svnmad_n_f32_z
111+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
112+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %op3)
113+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.sel.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> zeroinitializer)
114+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %[[SEL]], <vscale x 4 x float> %op2, <vscale x 4 x float> %[[DUP]])
115+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
116+
return SVE_ACLE_FUNC(svnmad,_n_f32,_z,)(pg, op1, op2, op3);
117+
}
118+
119+
svfloat64_t test_svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
120+
{
121+
// CHECK-LABEL: test_svnmad_n_f64_z
122+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
123+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %op3)
124+
// CHECK-DAG: %[[SEL:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> zeroinitializer)
125+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %[[SEL]], <vscale x 2 x double> %op2, <vscale x 2 x double> %[[DUP]])
126+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
127+
return SVE_ACLE_FUNC(svnmad,_n_f64,_z,)(pg, op1, op2, op3);
128+
}
129+
130+
svfloat16_t test_svnmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
131+
{
132+
// CHECK-LABEL: test_svnmad_n_f16_m
133+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
134+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %op3)
135+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %[[DUP]])
136+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
137+
return SVE_ACLE_FUNC(svnmad,_n_f16,_m,)(pg, op1, op2, op3);
138+
}
139+
140+
svfloat32_t test_svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
141+
{
142+
// CHECK-LABEL: test_svnmad_n_f32_m
143+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
144+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %op3)
145+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %[[DUP]])
146+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
147+
return SVE_ACLE_FUNC(svnmad,_n_f32,_m,)(pg, op1, op2, op3);
148+
}
149+
150+
svfloat64_t test_svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
151+
{
152+
// CHECK-LABEL: test_svnmad_n_f64_m
153+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
154+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %op3)
155+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %[[DUP]])
156+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
157+
return SVE_ACLE_FUNC(svnmad,_n_f64,_m,)(pg, op1, op2, op3);
158+
}
159+
160+
svfloat16_t test_svnmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
161+
{
162+
// CHECK-LABEL: test_svnmad_n_f16_x
163+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
164+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %op3)
165+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %[[DUP]])
166+
// CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
167+
return SVE_ACLE_FUNC(svnmad,_n_f16,_x,)(pg, op1, op2, op3);
168+
}
169+
170+
svfloat32_t test_svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
171+
{
172+
// CHECK-LABEL: test_svnmad_n_f32_x
173+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
174+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %op3)
175+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %[[DUP]])
176+
// CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
177+
return SVE_ACLE_FUNC(svnmad,_n_f32,_x,)(pg, op1, op2, op3);
178+
}
179+
180+
svfloat64_t test_svnmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
181+
{
182+
// CHECK-LABEL: test_svnmad_n_f64_x
183+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
184+
// CHECK-DAG: %[[DUP:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %op3)
185+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %[[DUP]])
186+
// CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
187+
return SVE_ACLE_FUNC(svnmad,_n_f64,_x,)(pg, op1, op2, op3);
188+
}

0 commit comments

Comments
 (0)