Skip to content

Commit 50ccf2c

Browse files
committed
[RISCV] custom scmp(x,0) and scmp(0,x) lowering for RVV
The current codegen for scmp(x,0) and scmp(0,x), also known as sign(x) and -sign(x), isn't optimal for RVV. It produces a four instruction sequence of for instructions vmsgt.vi + vmslt.vi + vmerge.vim + vmerge.vim for SEW<=32 and three instructions for SEW=64. scmp(0,x): vmsgt.vi + vsra.vx + vor.vi scmp(x,0): vmsgt.vi + vsrl.vx + vmerge.vim This patch introduces a new lowering for all values of SEW which expresses the above in SelectionDAG Nodes. This maps to two arithmetic instructions and a vector register move: scmp(0,x): vmv.v.i/v + vmsgt.vi + masked vsra.vi/vx scmp(x,0): vmv.v.i/v + vmsgt.vi + masked vsrl.vi/vx These clobber v0, need to have a different destination than the input and need to use an additional GPR for SEW=64. For the SEW<=32 and scmp(x,0) case a slightly different lowering was chooses: scmp(x,0): vmin.vx + vsra.i + vor.vv This doesn't clobber v0, but uses a single GPR. We deemed using a single GPR slightly better than clobbering v0 (SEW<=32), but using two GPRs as worse than using one GPR and clobbering v0.
1 parent de5c1c9 commit 50ccf2c

File tree

3 files changed

+162
-149
lines changed

3 files changed

+162
-149
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
880880
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
881881
Legal);
882882

883+
setOperationAction(ISD::SCMP, VT, Custom);
883884
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
884885

885886
// Custom-lower extensions and truncations from/to mask types.
@@ -1361,6 +1362,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13611362
setOperationAction(
13621363
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
13631364

1365+
setOperationAction(ISD::SCMP, VT, Custom);
13641366
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
13651367

13661368
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
@@ -8223,6 +8225,36 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
82238225
case ISD::SADDSAT:
82248226
case ISD::SSUBSAT:
82258227
return lowerToScalableOp(Op, DAG);
8228+
case ISD::SCMP: {
8229+
SDLoc DL(Op);
8230+
EVT VT = Op->getValueType(0);
8231+
SDValue LHS = Op->getOperand(0);
8232+
SDValue RHS = Op->getOperand(1);
8233+
unsigned SEW = VT.getScalarSizeInBits();
8234+
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8235+
8236+
SDValue Shift = DAG.getConstant(SEW - 1, DL, VT);
8237+
SDValue Zero = DAG.getConstant(0, DL, VT);
8238+
SDValue One = DAG.getConstant(1, DL, VT);
8239+
SDValue MinusOne = DAG.getAllOnesConstant(DL, VT);
8240+
8241+
if (ISD::isConstantSplatVectorAllZeros(RHS.getNode())) {
8242+
// scmp(lhs, 0) -> vor.vv(vsra.vi/vx(lhs,SEW-1), vmin.vx(lhs,1))
8243+
LHS = DAG.getFreeze(LHS);
8244+
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, LHS, Shift);
8245+
SDValue Min = DAG.getNode(ISD::SMIN, DL, VT, LHS, One);
8246+
return DAG.getNode(ISD::OR, DL, VT, Sra, Min);
8247+
}
8248+
if (ISD::isConstantSplatVectorAllZeros(LHS.getNode())) {
8249+
// scmp(0, rhs) -> vmerge.vi(vmsle.vi(rhs,0), vsrl.vi/vx(rhs,SEW-1), -1)
8250+
RHS = DAG.getFreeze(RHS);
8251+
SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, RHS, Shift);
8252+
SDValue Setcc = DAG.getSetCC(DL, CCVT, RHS, Zero, ISD::SETLE);
8253+
return DAG.getSelect(DL, VT, Setcc, Srl, MinusOne);
8254+
}
8255+
8256+
return SDValue();
8257+
}
82268258
case ISD::ABDS:
82278259
case ISD::ABDU: {
82288260
SDLoc dl(Op);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scmp.ll

Lines changed: 75 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -4,186 +4,185 @@
44

55
define <16 x i8> @scmp_i8i8(<16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: scmp_i8i8:
7-
; CHECK: # %bb.0: # %entry
7+
; CHECK: # %bb.0:
88
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
99
; CHECK-NEXT: vmslt.vv v0, v9, v8
1010
; CHECK-NEXT: vmv.v.i v10, 0
1111
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
1212
; CHECK-NEXT: vmslt.vv v0, v8, v9
1313
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
1414
; CHECK-NEXT: ret
15-
entry:
1615
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> %b)
1716
ret <16 x i8> %c
1817
}
1918

2019
define <16 x i8> @scmp_z8i8(<16 x i8> %a) {
2120
; CHECK-LABEL: scmp_z8i8:
22-
; CHECK: # %bb.0: # %entry
23-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
24-
; CHECK-NEXT: vmsle.vi v0, v8, -1
25-
; CHECK-NEXT: vmv.v.i v9, 0
26-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
27-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
28-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
23+
; CHECK-NEXT: vmsle.vi v0, v8, 0
24+
; CHECK-NEXT: vmv.v.i v9, -1
25+
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
26+
; CHECK-NEXT: vmv.v.v v8, v9
2927
; CHECK-NEXT: ret
30-
entry:
3128
%c = call <16 x i8> @llvm.scmp(<16 x i8> zeroinitializer, <16 x i8> %a)
3229
ret <16 x i8> %c
3330
}
3431

3532
define <16 x i8> @scmp_i8z8(<16 x i8> %a) {
3633
; CHECK-LABEL: scmp_i8z8:
37-
; CHECK: # %bb.0: # %entry
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: li a0, 1
3836
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
39-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
40-
; CHECK-NEXT: vmv.v.i v9, 0
41-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
42-
; CHECK-NEXT: vmsle.vi v0, v8, -1
43-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
37+
; CHECK-NEXT: vmin.vx v9, v8, a0
38+
; CHECK-NEXT: vsra.vi v8, v8, 7
39+
; CHECK-NEXT: vor.vv v8, v8, v9
4440
; CHECK-NEXT: ret
45-
entry:
4641
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> zeroinitializer)
4742
ret <16 x i8> %c
4843
}
4944

5045

5146
define <8 x i16> @scmp_i16i16(<8 x i16> %a, <8 x i16> %b) {
5247
; CHECK-LABEL: scmp_i16i16:
53-
; CHECK: # %bb.0: # %entry
48+
; CHECK: # %bb.0:
5449
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5550
; CHECK-NEXT: vmslt.vv v0, v9, v8
5651
; CHECK-NEXT: vmv.v.i v10, 0
5752
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
5853
; CHECK-NEXT: vmslt.vv v0, v8, v9
5954
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
6055
; CHECK-NEXT: ret
61-
entry:
6256
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> %b)
6357
ret <8 x i16> %c
6458
}
6559

6660
define <8 x i16> @scmp_z16i16(<8 x i16> %a) {
6761
; CHECK-LABEL: scmp_z16i16:
68-
; CHECK: # %bb.0: # %entry
69-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
70-
; CHECK-NEXT: vmsle.vi v0, v8, -1
71-
; CHECK-NEXT: vmv.v.i v9, 0
72-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
73-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
74-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
64+
; CHECK-NEXT: vmsle.vi v0, v8, 0
65+
; CHECK-NEXT: vmv.v.i v9, -1
66+
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
67+
; CHECK-NEXT: vmv.v.v v8, v9
7568
; CHECK-NEXT: ret
76-
entry:
7769
%c = call <8 x i16> @llvm.scmp(<8 x i16> zeroinitializer, <8 x i16> %a)
7870
ret <8 x i16> %c
7971
}
8072

8173
define <8 x i16> @scmp_i16z16(<8 x i16> %a) {
8274
; CHECK-LABEL: scmp_i16z16:
83-
; CHECK: # %bb.0: # %entry
75+
; CHECK: # %bb.0:
76+
; CHECK-NEXT: li a0, 1
8477
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
85-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
86-
; CHECK-NEXT: vmv.v.i v9, 0
87-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
88-
; CHECK-NEXT: vmsle.vi v0, v8, -1
89-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
78+
; CHECK-NEXT: vmin.vx v9, v8, a0
79+
; CHECK-NEXT: vsra.vi v8, v8, 15
80+
; CHECK-NEXT: vor.vv v8, v8, v9
9081
; CHECK-NEXT: ret
91-
entry:
9282
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> zeroinitializer)
9383
ret <8 x i16> %c
9484
}
9585

9686

9787
define <4 x i32> @scmp_i32i32(<4 x i32> %a, <4 x i32> %b) {
9888
; CHECK-LABEL: scmp_i32i32:
99-
; CHECK: # %bb.0: # %entry
89+
; CHECK: # %bb.0:
10090
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
10191
; CHECK-NEXT: vmslt.vv v0, v9, v8
10292
; CHECK-NEXT: vmv.v.i v10, 0
10393
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
10494
; CHECK-NEXT: vmslt.vv v0, v8, v9
10595
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
10696
; CHECK-NEXT: ret
107-
entry:
10897
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> %b)
10998
ret <4 x i32> %c
11099
}
111100

112101
define <4 x i32> @scmp_z32i32(<4 x i32> %a) {
113102
; CHECK-LABEL: scmp_z32i32:
114-
; CHECK: # %bb.0: # %entry
115-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
116-
; CHECK-NEXT: vmsle.vi v0, v8, -1
117-
; CHECK-NEXT: vmv.v.i v9, 0
118-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
119-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
120-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
103+
; CHECK: # %bb.0:
104+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
105+
; CHECK-NEXT: vmsle.vi v0, v8, 0
106+
; CHECK-NEXT: vmv.v.i v9, -1
107+
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
108+
; CHECK-NEXT: vmv.v.v v8, v9
121109
; CHECK-NEXT: ret
122-
entry:
123110
%c = call <4 x i32> @llvm.scmp(<4 x i32> zeroinitializer, <4 x i32> %a)
124111
ret <4 x i32> %c
125112
}
126113

127114
define <4 x i32> @scmp_i32z32(<4 x i32> %a) {
128115
; CHECK-LABEL: scmp_i32z32:
129-
; CHECK: # %bb.0: # %entry
116+
; CHECK: # %bb.0:
117+
; CHECK-NEXT: li a0, 1
130118
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
131-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
132-
; CHECK-NEXT: vmv.v.i v9, 0
133-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
134-
; CHECK-NEXT: vmsle.vi v0, v8, -1
135-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
119+
; CHECK-NEXT: vmin.vx v9, v8, a0
120+
; CHECK-NEXT: vsra.vi v8, v8, 31
121+
; CHECK-NEXT: vor.vv v8, v8, v9
136122
; CHECK-NEXT: ret
137-
entry:
138123
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> zeroinitializer)
139124
ret <4 x i32> %c
140125
}
141126

142127

143128
define <2 x i64> @scmp_i64i64(<2 x i64> %a, <2 x i64> %b) {
144129
; CHECK-LABEL: scmp_i64i64:
145-
; CHECK: # %bb.0: # %entry
130+
; CHECK: # %bb.0:
146131
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
147132
; CHECK-NEXT: vmslt.vv v0, v9, v8
148133
; CHECK-NEXT: vmv.v.i v10, 0
149134
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
150135
; CHECK-NEXT: vmslt.vv v0, v8, v9
151136
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
152137
; CHECK-NEXT: ret
153-
entry:
154138
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> %b)
155139
ret <2 x i64> %c
156140
}
157141

158142
define <2 x i64> @scmp_z64i64(<2 x i64> %a) {
159-
; CHECK-LABEL: scmp_z64i64:
160-
; CHECK: # %bb.0: # %entry
161-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
162-
; CHECK-NEXT: vmsle.vi v0, v8, -1
163-
; CHECK-NEXT: vmv.v.i v9, 0
164-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
165-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
166-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
167-
; CHECK-NEXT: ret
168-
entry:
143+
; RV32-LABEL: scmp_z64i64:
144+
; RV32: # %bb.0:
145+
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
146+
; RV32-NEXT: vmsle.vi v0, v8, -1
147+
; RV32-NEXT: vmv.v.i v9, 0
148+
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
149+
; RV32-NEXT: vmsgt.vi v0, v8, 0
150+
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
151+
; RV32-NEXT: ret
152+
;
153+
; RV64-LABEL: scmp_z64i64:
154+
; RV64: # %bb.0:
155+
; RV64-NEXT: li a0, 63
156+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
157+
; RV64-NEXT: vmsle.vi v0, v8, 0
158+
; RV64-NEXT: vmv.v.i v9, -1
159+
; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
160+
; RV64-NEXT: vmv.v.v v8, v9
161+
; RV64-NEXT: ret
169162
%c = call <2 x i64> @llvm.scmp(<2 x i64> zeroinitializer, <2 x i64> %a)
170163
ret <2 x i64> %c
171164
}
172165

173166
define <2 x i64> @scmp_i64z64(<2 x i64> %a) {
174-
; CHECK-LABEL: scmp_i64z64:
175-
; CHECK: # %bb.0: # %entry
176-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
177-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
178-
; CHECK-NEXT: vmv.v.i v9, 0
179-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
180-
; CHECK-NEXT: vmsle.vi v0, v8, -1
181-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
182-
; CHECK-NEXT: ret
183-
entry:
167+
; RV32-LABEL: scmp_i64z64:
168+
; RV32: # %bb.0:
169+
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
170+
; RV32-NEXT: vmsgt.vi v0, v8, 0
171+
; RV32-NEXT: vmv.v.i v9, 0
172+
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
173+
; RV32-NEXT: vmsle.vi v0, v8, -1
174+
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
175+
; RV32-NEXT: ret
176+
;
177+
; RV64-LABEL: scmp_i64z64:
178+
; RV64: # %bb.0:
179+
; RV64-NEXT: li a0, 1
180+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
181+
; RV64-NEXT: vmin.vx v9, v8, a0
182+
; RV64-NEXT: li a0, 63
183+
; RV64-NEXT: vsra.vx v8, v8, a0
184+
; RV64-NEXT: vor.vv v8, v8, v9
185+
; RV64-NEXT: ret
184186
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> zeroinitializer)
185187
ret <2 x i64> %c
186188
}
187-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
188-
; RV32: {{.*}}
189-
; RV64: {{.*}}

0 commit comments

Comments
 (0)