Skip to content

Commit ca76290

Browse files
committed
[RISCV] custom scmp(x,0) and scmp(0,x) lowering for RVV
The current codegen for scmp(x,0) and scmp(0,x), also known as sign(x) and -sign(x), isn't optimal for RVV. It produces a four instruction sequence of for instructions vmsgt.vi + vmslt.vi + vmerge.vim + vmerge.vim for SEW<=32 and three instructions for SEW=64. scmp(0,x): vmsgt.vi + vsra.vx + vor.vi scmp(x,0): vmsgt.vi + vsrl.vx + vmerge.vim This patch introduces a new lowering for all values of SEW which expresses the above in SelectionDAG Nodes. This maps to two arithmetic instructions and a vector register move: scmp(0,x): vmv.v.i/v + vmsgt.vi + masked vsra.vi/vx scmp(x,0): vmv.v.i/v + vmsgt.vi + masked vsrl.vi/vx These clobber v0, need to have a different destination than the input and need to use an additional GPR for SEW=64. For the SEW<=32 and scmp(x,0) case a slightly different lowering was chooses: scmp(x,0): vmin.vx + vsra.i + vor.vv This doesn't clobber v0, but uses a single GPR. We deemed using a single GPR slightly better than clobbering v0 (SEW<=32), but using two GPRs as worse than using one GPR and clobbering v0.
1 parent de5c1c9 commit ca76290

File tree

3 files changed

+135
-94
lines changed

3 files changed

+135
-94
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
880880
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
881881
Legal);
882882

883+
setOperationAction(ISD::SCMP, VT, Custom);
883884
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
884885

885886
// Custom-lower extensions and truncations from/to mask types.
@@ -1361,6 +1362,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13611362
setOperationAction(
13621363
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
13631364

1365+
setOperationAction(ISD::SCMP, VT, Custom);
13641366
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
13651367

13661368
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
@@ -8223,6 +8225,40 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
82238225
case ISD::SADDSAT:
82248226
case ISD::SSUBSAT:
82258227
return lowerToScalableOp(Op, DAG);
8228+
case ISD::SCMP: {
8229+
SDLoc DL(Op);
8230+
EVT VT = Op->getValueType(0);
8231+
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8232+
8233+
SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8234+
SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8235+
unsigned SEW = VT.getScalarSizeInBits();
8236+
8237+
SDValue Shift = DAG.getConstant(SEW-1, DL, VT);
8238+
SDValue Zero = DAG.getConstant(0, DL, VT);
8239+
SDValue One = DAG.getConstant(1, DL, VT);
8240+
SDValue MinusOne = DAG.getAllOnesConstant(DL, VT);
8241+
8242+
if (ISD::isConstantSplatVectorAllZeros(RHS.getNode())) {
8243+
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, LHS, Shift);
8244+
if (SEW <= 32) {
8245+
// scmp(lhs, 0) -> vor.vv(vsra.vi(lhs,SEW-1), vmin.vx(lhs,1))
8246+
SDValue Min = DAG.getNode(ISD::SMIN, DL, VT, LHS, One);
8247+
return DAG.getNode(ISD::OR, DL, VT, Sra, Min);
8248+
}
8249+
// scmp(lhs, 0) -> vmerge.vi(vmsgt.vi(rhs,0), vsra.vx(lhs,SEW-1), 1)
8250+
SDValue Setcc = DAG.getSetCC(DL, CCVT, LHS, Zero, ISD::SETGT);
8251+
return DAG.getSelect(DL, VT, Setcc, Sra, One);
8252+
}
8253+
if (ISD::isConstantSplatVectorAllZeros(LHS.getNode())) {
8254+
// scmp(0, rhs) -> vmerge.vi(vmsgt.vi(rhs,0), vsrl.vi/vx(rhs,SEW-1), -1)
8255+
SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, RHS, Shift);
8256+
SDValue Setcc = DAG.getSetCC(DL, CCVT, RHS, Zero, ISD::SETGT);
8257+
return DAG.getSelect(DL, VT, Setcc, Srl, MinusOne);
8258+
}
8259+
8260+
return SDValue();
8261+
}
82268262
case ISD::ABDS:
82278263
case ISD::ABDU: {
82288264
SDLoc dl(Op);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scmp.ll

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@ entry:
2020
define <16 x i8> @scmp_z8i8(<16 x i8> %a) {
2121
; CHECK-LABEL: scmp_z8i8:
2222
; CHECK: # %bb.0: # %entry
23-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
24-
; CHECK-NEXT: vmsle.vi v0, v8, -1
25-
; CHECK-NEXT: vmv.v.i v9, 0
26-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
23+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
2724
; CHECK-NEXT: vmsgt.vi v0, v8, 0
28-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
25+
; CHECK-NEXT: vmv.v.i v9, -1
26+
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
27+
; CHECK-NEXT: vmv.v.v v8, v9
2928
; CHECK-NEXT: ret
3029
entry:
3130
%c = call <16 x i8> @llvm.scmp(<16 x i8> zeroinitializer, <16 x i8> %a)
@@ -35,12 +34,11 @@ entry:
3534
define <16 x i8> @scmp_i8z8(<16 x i8> %a) {
3635
; CHECK-LABEL: scmp_i8z8:
3736
; CHECK: # %bb.0: # %entry
37+
; CHECK-NEXT: li a0, 1
3838
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
39-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
40-
; CHECK-NEXT: vmv.v.i v9, 0
41-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
42-
; CHECK-NEXT: vmsle.vi v0, v8, -1
43-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
39+
; CHECK-NEXT: vmin.vx v9, v8, a0
40+
; CHECK-NEXT: vsra.vi v8, v8, 7
41+
; CHECK-NEXT: vor.vv v8, v8, v9
4442
; CHECK-NEXT: ret
4543
entry:
4644
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> zeroinitializer)
@@ -66,12 +64,11 @@ entry:
6664
define <8 x i16> @scmp_z16i16(<8 x i16> %a) {
6765
; CHECK-LABEL: scmp_z16i16:
6866
; CHECK: # %bb.0: # %entry
69-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
70-
; CHECK-NEXT: vmsle.vi v0, v8, -1
71-
; CHECK-NEXT: vmv.v.i v9, 0
72-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
67+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7368
; CHECK-NEXT: vmsgt.vi v0, v8, 0
74-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
69+
; CHECK-NEXT: vmv.v.i v9, -1
70+
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
71+
; CHECK-NEXT: vmv.v.v v8, v9
7572
; CHECK-NEXT: ret
7673
entry:
7774
%c = call <8 x i16> @llvm.scmp(<8 x i16> zeroinitializer, <8 x i16> %a)
@@ -81,12 +78,11 @@ entry:
8178
define <8 x i16> @scmp_i16z16(<8 x i16> %a) {
8279
; CHECK-LABEL: scmp_i16z16:
8380
; CHECK: # %bb.0: # %entry
81+
; CHECK-NEXT: li a0, 1
8482
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
85-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
86-
; CHECK-NEXT: vmv.v.i v9, 0
87-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
88-
; CHECK-NEXT: vmsle.vi v0, v8, -1
89-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
83+
; CHECK-NEXT: vmin.vx v9, v8, a0
84+
; CHECK-NEXT: vsra.vi v8, v8, 15
85+
; CHECK-NEXT: vor.vv v8, v8, v9
9086
; CHECK-NEXT: ret
9187
entry:
9288
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> zeroinitializer)
@@ -112,12 +108,11 @@ entry:
112108
define <4 x i32> @scmp_z32i32(<4 x i32> %a) {
113109
; CHECK-LABEL: scmp_z32i32:
114110
; CHECK: # %bb.0: # %entry
115-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
116-
; CHECK-NEXT: vmsle.vi v0, v8, -1
117-
; CHECK-NEXT: vmv.v.i v9, 0
118-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
111+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
119112
; CHECK-NEXT: vmsgt.vi v0, v8, 0
120-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
113+
; CHECK-NEXT: vmv.v.i v9, -1
114+
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
115+
; CHECK-NEXT: vmv.v.v v8, v9
121116
; CHECK-NEXT: ret
122117
entry:
123118
%c = call <4 x i32> @llvm.scmp(<4 x i32> zeroinitializer, <4 x i32> %a)
@@ -127,12 +122,11 @@ entry:
127122
define <4 x i32> @scmp_i32z32(<4 x i32> %a) {
128123
; CHECK-LABEL: scmp_i32z32:
129124
; CHECK: # %bb.0: # %entry
125+
; CHECK-NEXT: li a0, 1
130126
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
131-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
132-
; CHECK-NEXT: vmv.v.i v9, 0
133-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
134-
; CHECK-NEXT: vmsle.vi v0, v8, -1
135-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
127+
; CHECK-NEXT: vmin.vx v9, v8, a0
128+
; CHECK-NEXT: vsra.vi v8, v8, 31
129+
; CHECK-NEXT: vor.vv v8, v8, v9
136130
; CHECK-NEXT: ret
137131
entry:
138132
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> zeroinitializer)
@@ -156,34 +150,51 @@ entry:
156150
}
157151

158152
define <2 x i64> @scmp_z64i64(<2 x i64> %a) {
159-
; CHECK-LABEL: scmp_z64i64:
160-
; CHECK: # %bb.0: # %entry
161-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
162-
; CHECK-NEXT: vmsle.vi v0, v8, -1
163-
; CHECK-NEXT: vmv.v.i v9, 0
164-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
165-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
166-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
167-
; CHECK-NEXT: ret
153+
; RV32-LABEL: scmp_z64i64:
154+
; RV32: # %bb.0: # %entry
155+
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
156+
; RV32-NEXT: vmsle.vi v0, v8, -1
157+
; RV32-NEXT: vmv.v.i v9, 0
158+
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
159+
; RV32-NEXT: vmsgt.vi v0, v8, 0
160+
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
161+
; RV32-NEXT: ret
162+
;
163+
; RV64-LABEL: scmp_z64i64:
164+
; RV64: # %bb.0: # %entry
165+
; RV64-NEXT: li a0, 63
166+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
167+
; RV64-NEXT: vmsgt.vi v0, v8, 0
168+
; RV64-NEXT: vmv.v.i v9, -1
169+
; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
170+
; RV64-NEXT: vmv.v.v v8, v9
171+
; RV64-NEXT: ret
168172
entry:
169173
%c = call <2 x i64> @llvm.scmp(<2 x i64> zeroinitializer, <2 x i64> %a)
170174
ret <2 x i64> %c
171175
}
172176

173177
define <2 x i64> @scmp_i64z64(<2 x i64> %a) {
174-
; CHECK-LABEL: scmp_i64z64:
175-
; CHECK: # %bb.0: # %entry
176-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
177-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
178-
; CHECK-NEXT: vmv.v.i v9, 0
179-
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
180-
; CHECK-NEXT: vmsle.vi v0, v8, -1
181-
; CHECK-NEXT: vmerge.vim v8, v9, -1, v0
182-
; CHECK-NEXT: ret
178+
; RV32-LABEL: scmp_i64z64:
179+
; RV32: # %bb.0: # %entry
180+
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
181+
; RV32-NEXT: vmsgt.vi v0, v8, 0
182+
; RV32-NEXT: vmv.v.i v9, 0
183+
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
184+
; RV32-NEXT: vmsle.vi v0, v8, -1
185+
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
186+
; RV32-NEXT: ret
187+
;
188+
; RV64-LABEL: scmp_i64z64:
189+
; RV64: # %bb.0: # %entry
190+
; RV64-NEXT: li a0, 63
191+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
192+
; RV64-NEXT: vmsgt.vi v0, v8, 0
193+
; RV64-NEXT: vmv.v.i v9, 1
194+
; RV64-NEXT: vsra.vx v9, v8, a0, v0.t
195+
; RV64-NEXT: vmv.v.v v8, v9
196+
; RV64-NEXT: ret
183197
entry:
184198
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> zeroinitializer)
185199
ret <2 x i64> %c
186200
}
187-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
188-
; RV32: {{.*}}
189-
; RV64: {{.*}}

llvm/test/CodeGen/RISCV/rvv/scmp.ll

Lines changed: 37 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ entry:
2020
define <vscale x 16 x i8> @scmp_z8i8(<vscale x 16 x i8> %a) {
2121
; CHECK-LABEL: scmp_z8i8:
2222
; CHECK: # %bb.0: # %entry
23-
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
23+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu
2424
; CHECK-NEXT: vmsgt.vi v0, v8, 0
25-
; CHECK-NEXT: vsrl.vi v8, v8, 7
26-
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
25+
; CHECK-NEXT: vmv.v.i v10, -1
26+
; CHECK-NEXT: vsrl.vi v10, v8, 7, v0.t
27+
; CHECK-NEXT: vmv.v.v v8, v10
2728
; CHECK-NEXT: ret
2829
entry:
2930
%c = call <vscale x 16 x i8> @llvm.scmp(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -33,12 +34,11 @@ entry:
3334
define <vscale x 16 x i8> @scmp_i8z8(<vscale x 16 x i8> %a) {
3435
; CHECK-LABEL: scmp_i8z8:
3536
; CHECK: # %bb.0: # %entry
36-
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
37-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
38-
; CHECK-NEXT: vmv.v.i v10, 0
39-
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
40-
; CHECK-NEXT: vmsle.vi v0, v8, -1
41-
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
37+
; CHECK-NEXT: li a0, 1
38+
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
39+
; CHECK-NEXT: vmin.vx v10, v8, a0
40+
; CHECK-NEXT: vsra.vi v8, v8, 7
41+
; CHECK-NEXT: vor.vv v8, v8, v10
4242
; CHECK-NEXT: ret
4343
entry:
4444
%c = call <vscale x 16 x i8> @llvm.scmp(<vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -64,10 +64,11 @@ entry:
6464
define <vscale x 8 x i16> @scmp_z16i16(<vscale x 8 x i16> %a) {
6565
; CHECK-LABEL: scmp_z16i16:
6666
; CHECK: # %bb.0: # %entry
67-
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
67+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
6868
; CHECK-NEXT: vmsgt.vi v0, v8, 0
69-
; CHECK-NEXT: vsrl.vi v8, v8, 15
70-
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
69+
; CHECK-NEXT: vmv.v.i v10, -1
70+
; CHECK-NEXT: vsrl.vi v10, v8, 15, v0.t
71+
; CHECK-NEXT: vmv.v.v v8, v10
7172
; CHECK-NEXT: ret
7273
entry:
7374
%c = call <vscale x 8 x i16> @llvm.scmp(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %a)
@@ -77,12 +78,11 @@ entry:
7778
define <vscale x 8 x i16> @scmp_i16z16(<vscale x 8 x i16> %a) {
7879
; CHECK-LABEL: scmp_i16z16:
7980
; CHECK: # %bb.0: # %entry
80-
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
81-
; CHECK-NEXT: vmsgt.vi v0, v8, 0
82-
; CHECK-NEXT: vmv.v.i v10, 0
83-
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
84-
; CHECK-NEXT: vmsle.vi v0, v8, -1
85-
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
81+
; CHECK-NEXT: li a0, 1
82+
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
83+
; CHECK-NEXT: vmin.vx v10, v8, a0
84+
; CHECK-NEXT: vsra.vi v8, v8, 15
85+
; CHECK-NEXT: vor.vv v8, v8, v10
8686
; CHECK-NEXT: ret
8787
entry:
8888
%c = call <vscale x 8 x i16> @llvm.scmp(<vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer)
@@ -108,34 +108,26 @@ entry:
108108
define <vscale x 4 x i32> @scmp_z32i32(<vscale x 4 x i32> %a) {
109109
; CHECK-LABEL: scmp_z32i32:
110110
; CHECK: # %bb.0: # %entry
111-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
111+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
112112
; CHECK-NEXT: vmsgt.vi v0, v8, 0
113-
; CHECK-NEXT: vsrl.vi v8, v8, 31
114-
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
113+
; CHECK-NEXT: vmv.v.i v10, -1
114+
; CHECK-NEXT: vsrl.vi v10, v8, 31, v0.t
115+
; CHECK-NEXT: vmv.v.v v8, v10
115116
; CHECK-NEXT: ret
116117
entry:
117118
%c = call <vscale x 4 x i32> @llvm.scmp(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %a)
118119
ret <vscale x 4 x i32> %c
119120
}
120121

121122
define <vscale x 4 x i32> @scmp_i32z32(<vscale x 4 x i32> %a) {
122-
; RV32-LABEL: scmp_i32z32:
123-
; RV32: # %bb.0: # %entry
124-
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu
125-
; RV32-NEXT: vmsgt.vi v0, v8, 0
126-
; RV32-NEXT: vsra.vi v8, v8, 31
127-
; RV32-NEXT: vor.vi v8, v8, 1, v0.t
128-
; RV32-NEXT: ret
129-
;
130-
; RV64-LABEL: scmp_i32z32:
131-
; RV64: # %bb.0: # %entry
132-
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
133-
; RV64-NEXT: vmsgt.vi v0, v8, 0
134-
; RV64-NEXT: vmv.v.i v10, 0
135-
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
136-
; RV64-NEXT: vmsle.vi v0, v8, -1
137-
; RV64-NEXT: vmerge.vim v8, v10, -1, v0
138-
; RV64-NEXT: ret
123+
; CHECK-LABEL: scmp_i32z32:
124+
; CHECK: # %bb.0: # %entry
125+
; CHECK-NEXT: li a0, 1
126+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
127+
; CHECK-NEXT: vmin.vx v10, v8, a0
128+
; CHECK-NEXT: vsra.vi v8, v8, 31
129+
; CHECK-NEXT: vor.vv v8, v8, v10
130+
; CHECK-NEXT: ret
139131
entry:
140132
%c = call <vscale x 4 x i32> @llvm.scmp(<vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer)
141133
ret <vscale x 4 x i32> %c
@@ -171,10 +163,11 @@ define <vscale x 2 x i64> @scmp_z64i64(<vscale x 2 x i64> %a) {
171163
; RV64-LABEL: scmp_z64i64:
172164
; RV64: # %bb.0: # %entry
173165
; RV64-NEXT: li a0, 63
174-
; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma
166+
; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu
175167
; RV64-NEXT: vmsgt.vi v0, v8, 0
176-
; RV64-NEXT: vsrl.vx v8, v8, a0
177-
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
168+
; RV64-NEXT: vmv.v.i v10, -1
169+
; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t
170+
; RV64-NEXT: vmv.v.v v8, v10
178171
; RV64-NEXT: ret
179172
entry:
180173
%c = call <vscale x 2 x i64> @llvm.scmp(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %a)
@@ -197,8 +190,9 @@ define <vscale x 2 x i64> @scmp_i64z64(<vscale x 2 x i64> %a) {
197190
; RV64-NEXT: li a0, 63
198191
; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu
199192
; RV64-NEXT: vmsgt.vi v0, v8, 0
200-
; RV64-NEXT: vsra.vx v8, v8, a0
201-
; RV64-NEXT: vor.vi v8, v8, 1, v0.t
193+
; RV64-NEXT: vmv.v.i v10, 1
194+
; RV64-NEXT: vsra.vx v10, v8, a0, v0.t
195+
; RV64-NEXT: vmv.v.v v8, v10
202196
; RV64-NEXT: ret
203197
entry:
204198
%c = call <vscale x 2 x i64> @llvm.scmp(<vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer)

0 commit comments

Comments
 (0)