Skip to content

Commit ece7a72

Browse files
authored
[LoongArch] Optimize insertelement containing variable index using compare+select (#151131)
1 parent 16d5db7 commit ece7a72

File tree

3 files changed

+131
-140
lines changed

3 files changed

+131
-140
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2621,9 +2621,38 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
26212621
SDValue
26222622
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
26232623
SelectionDAG &DAG) const {
2624-
if (isa<ConstantSDNode>(Op->getOperand(2)))
2624+
MVT VT = Op.getSimpleValueType();
2625+
MVT EltVT = VT.getVectorElementType();
2626+
unsigned NumElts = VT.getVectorNumElements();
2627+
unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2628+
SDLoc DL(Op);
2629+
SDValue Op0 = Op.getOperand(0);
2630+
SDValue Op1 = Op.getOperand(1);
2631+
SDValue Op2 = Op.getOperand(2);
2632+
2633+
if (isa<ConstantSDNode>(Op2))
26252634
return Op;
2626-
return SDValue();
2635+
2636+
MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2637+
MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2638+
2639+
if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2640+
return SDValue();
2641+
2642+
SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2643+
SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2644+
2645+
SmallVector<SDValue, 32> RawIndices;
2646+
for (unsigned i = 0; i < NumElts; ++i)
2647+
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2648+
SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2649+
2650+
// insert vec, elt, idx
2651+
// =>
2652+
// select (splatidx == {0,1,2...}) ? splatelt : vec
2653+
SDValue SelectCC =
2654+
DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2655+
return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
26272656
}
26282657

26292658
SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll

Lines changed: 50 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -114,22 +114,15 @@ define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
114114
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
115115
; CHECK-LABEL: insert_32xi8_idx:
116116
; CHECK: # %bb.0:
117-
; CHECK-NEXT: addi.d $sp, $sp, -96
118-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
119-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
120-
; CHECK-NEXT: addi.d $fp, $sp, 96
121-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
122-
; CHECK-NEXT: xvld $xr0, $a0, 0
123-
; CHECK-NEXT: xvst $xr0, $sp, 32
124-
; CHECK-NEXT: addi.d $a0, $sp, 32
125-
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
126-
; CHECK-NEXT: st.b $a2, $a0, 0
127-
; CHECK-NEXT: xvld $xr0, $sp, 32
117+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
118+
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI8_0)
119+
; CHECK-NEXT: xvld $xr1, $a0, 0
120+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
121+
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
122+
; CHECK-NEXT: xvseq.b $xr0, $xr2, $xr0
123+
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
124+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
128125
; CHECK-NEXT: xvst $xr0, $a1, 0
129-
; CHECK-NEXT: addi.d $sp, $fp, -96
130-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
131-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
132-
; CHECK-NEXT: addi.d $sp, $sp, 96
133126
; CHECK-NEXT: ret
134127
%v = load volatile <32 x i8>, ptr %src
135128
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
@@ -140,22 +133,15 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
140133
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
141134
; CHECK-LABEL: insert_16xi16_idx:
142135
; CHECK: # %bb.0:
143-
; CHECK-NEXT: addi.d $sp, $sp, -96
144-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
145-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
146-
; CHECK-NEXT: addi.d $fp, $sp, 96
147-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
148-
; CHECK-NEXT: xvld $xr0, $a0, 0
149-
; CHECK-NEXT: xvst $xr0, $sp, 32
150-
; CHECK-NEXT: addi.d $a0, $sp, 32
151-
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
152-
; CHECK-NEXT: st.h $a2, $a0, 0
153-
; CHECK-NEXT: xvld $xr0, $sp, 32
136+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
137+
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI9_0)
138+
; CHECK-NEXT: xvld $xr1, $a0, 0
139+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
140+
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
141+
; CHECK-NEXT: xvseq.h $xr0, $xr2, $xr0
142+
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
143+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
154144
; CHECK-NEXT: xvst $xr0, $a1, 0
155-
; CHECK-NEXT: addi.d $sp, $fp, -96
156-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
157-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
158-
; CHECK-NEXT: addi.d $sp, $sp, 96
159145
; CHECK-NEXT: ret
160146
%v = load volatile <16 x i16>, ptr %src
161147
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
@@ -166,22 +152,15 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
166152
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
167153
; CHECK-LABEL: insert_8xi32_idx:
168154
; CHECK: # %bb.0:
169-
; CHECK-NEXT: addi.d $sp, $sp, -96
170-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
171-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
172-
; CHECK-NEXT: addi.d $fp, $sp, 96
173-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
174-
; CHECK-NEXT: xvld $xr0, $a0, 0
175-
; CHECK-NEXT: xvst $xr0, $sp, 32
176-
; CHECK-NEXT: addi.d $a0, $sp, 32
177-
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
178-
; CHECK-NEXT: st.w $a2, $a0, 0
179-
; CHECK-NEXT: xvld $xr0, $sp, 32
155+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI10_0)
156+
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI10_0)
157+
; CHECK-NEXT: xvld $xr1, $a0, 0
158+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
159+
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
160+
; CHECK-NEXT: xvseq.w $xr0, $xr2, $xr0
161+
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a2
162+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
180163
; CHECK-NEXT: xvst $xr0, $a1, 0
181-
; CHECK-NEXT: addi.d $sp, $fp, -96
182-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
183-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
184-
; CHECK-NEXT: addi.d $sp, $sp, 96
185164
; CHECK-NEXT: ret
186165
%v = load volatile <8 x i32>, ptr %src
187166
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
@@ -192,22 +171,15 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
192171
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
193172
; CHECK-LABEL: insert_4xi64_idx:
194173
; CHECK: # %bb.0:
195-
; CHECK-NEXT: addi.d $sp, $sp, -96
196-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
197-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
198-
; CHECK-NEXT: addi.d $fp, $sp, 96
199-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
200-
; CHECK-NEXT: xvld $xr0, $a0, 0
201-
; CHECK-NEXT: xvst $xr0, $sp, 32
202-
; CHECK-NEXT: addi.d $a0, $sp, 32
203-
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
204-
; CHECK-NEXT: st.d $a2, $a0, 0
205-
; CHECK-NEXT: xvld $xr0, $sp, 32
174+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI11_0)
175+
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI11_0)
176+
; CHECK-NEXT: xvld $xr1, $a0, 0
177+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
178+
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a0
179+
; CHECK-NEXT: xvseq.d $xr0, $xr2, $xr0
180+
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
181+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
206182
; CHECK-NEXT: xvst $xr0, $a1, 0
207-
; CHECK-NEXT: addi.d $sp, $fp, -96
208-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
209-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
210-
; CHECK-NEXT: addi.d $sp, $sp, 96
211183
; CHECK-NEXT: ret
212184
%v = load volatile <4 x i64>, ptr %src
213185
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
@@ -218,22 +190,16 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
218190
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
219191
; CHECK-LABEL: insert_8xfloat_idx:
220192
; CHECK: # %bb.0:
221-
; CHECK-NEXT: addi.d $sp, $sp, -96
222-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
223-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
224-
; CHECK-NEXT: addi.d $fp, $sp, 96
225-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
226-
; CHECK-NEXT: xvld $xr1, $a0, 0
227-
; CHECK-NEXT: xvst $xr1, $sp, 32
228-
; CHECK-NEXT: addi.d $a0, $sp, 32
229-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
230-
; CHECK-NEXT: fst.s $fa0, $a0, 0
231-
; CHECK-NEXT: xvld $xr0, $sp, 32
193+
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
194+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI12_0)
195+
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI12_0)
196+
; CHECK-NEXT: xvld $xr2, $a0, 0
197+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
198+
; CHECK-NEXT: xvreplgr2vr.w $xr3, $a0
199+
; CHECK-NEXT: xvseq.w $xr1, $xr3, $xr1
200+
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
201+
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
232202
; CHECK-NEXT: xvst $xr0, $a1, 0
233-
; CHECK-NEXT: addi.d $sp, $fp, -96
234-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
235-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
236-
; CHECK-NEXT: addi.d $sp, $sp, 96
237203
; CHECK-NEXT: ret
238204
%v = load volatile <8 x float>, ptr %src
239205
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
@@ -244,22 +210,16 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
244210
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
245211
; CHECK-LABEL: insert_4xdouble_idx:
246212
; CHECK: # %bb.0:
247-
; CHECK-NEXT: addi.d $sp, $sp, -96
248-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
249-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
250-
; CHECK-NEXT: addi.d $fp, $sp, 96
251-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
252-
; CHECK-NEXT: xvld $xr1, $a0, 0
253-
; CHECK-NEXT: xvst $xr1, $sp, 32
254-
; CHECK-NEXT: addi.d $a0, $sp, 32
255-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
256-
; CHECK-NEXT: fst.d $fa0, $a0, 0
257-
; CHECK-NEXT: xvld $xr0, $sp, 32
213+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
214+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI13_0)
215+
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI13_0)
216+
; CHECK-NEXT: xvld $xr2, $a0, 0
217+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
218+
; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
219+
; CHECK-NEXT: xvseq.d $xr1, $xr3, $xr1
220+
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
221+
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
258222
; CHECK-NEXT: xvst $xr0, $a1, 0
259-
; CHECK-NEXT: addi.d $sp, $fp, -96
260-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
261-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
262-
; CHECK-NEXT: addi.d $sp, $sp, 96
263223
; CHECK-NEXT: ret
264224
%v = load volatile <4 x double>, ptr %src
265225
%v_new = insertelement <4 x double> %v, double %in, i32 %idx

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,15 @@ define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
8484
define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
8585
; CHECK-LABEL: insert_16xi8_idx:
8686
; CHECK: # %bb.0:
87-
; CHECK-NEXT: addi.d $sp, $sp, -16
88-
; CHECK-NEXT: vld $vr0, $a0, 0
89-
; CHECK-NEXT: vst $vr0, $sp, 0
90-
; CHECK-NEXT: addi.d $a0, $sp, 0
91-
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
92-
; CHECK-NEXT: st.b $a2, $a0, 0
93-
; CHECK-NEXT: vld $vr0, $sp, 0
87+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
88+
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
89+
; CHECK-NEXT: vld $vr1, $a0, 0
90+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
91+
; CHECK-NEXT: vreplgr2vr.b $vr2, $a0
92+
; CHECK-NEXT: vseq.b $vr0, $vr2, $vr0
93+
; CHECK-NEXT: vreplgr2vr.b $vr2, $a2
94+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
9495
; CHECK-NEXT: vst $vr0, $a1, 0
95-
; CHECK-NEXT: addi.d $sp, $sp, 16
9696
; CHECK-NEXT: ret
9797
%v = load volatile <16 x i8>, ptr %src
9898
%v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx
@@ -103,15 +103,15 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
103103
define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
104104
; CHECK-LABEL: insert_8xi16_idx:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: addi.d $sp, $sp, -16
107-
; CHECK-NEXT: vld $vr0, $a0, 0
108-
; CHECK-NEXT: vst $vr0, $sp, 0
109-
; CHECK-NEXT: addi.d $a0, $sp, 0
110-
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
111-
; CHECK-NEXT: st.h $a2, $a0, 0
112-
; CHECK-NEXT: vld $vr0, $sp, 0
106+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
107+
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
108+
; CHECK-NEXT: vld $vr1, $a0, 0
109+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
110+
; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
111+
; CHECK-NEXT: vseq.h $vr0, $vr2, $vr0
112+
; CHECK-NEXT: vreplgr2vr.h $vr2, $a2
113+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
113114
; CHECK-NEXT: vst $vr0, $a1, 0
114-
; CHECK-NEXT: addi.d $sp, $sp, 16
115115
; CHECK-NEXT: ret
116116
%v = load volatile <8 x i16>, ptr %src
117117
%v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx
@@ -122,15 +122,15 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
122122
define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
123123
; CHECK-LABEL: insert_4xi32_idx:
124124
; CHECK: # %bb.0:
125-
; CHECK-NEXT: addi.d $sp, $sp, -16
126-
; CHECK-NEXT: vld $vr0, $a0, 0
127-
; CHECK-NEXT: vst $vr0, $sp, 0
128-
; CHECK-NEXT: addi.d $a0, $sp, 0
129-
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
130-
; CHECK-NEXT: st.w $a2, $a0, 0
131-
; CHECK-NEXT: vld $vr0, $sp, 0
125+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
126+
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
127+
; CHECK-NEXT: vld $vr1, $a0, 0
128+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
129+
; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
130+
; CHECK-NEXT: vseq.w $vr0, $vr2, $vr0
131+
; CHECK-NEXT: vreplgr2vr.w $vr2, $a2
132+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
132133
; CHECK-NEXT: vst $vr0, $a1, 0
133-
; CHECK-NEXT: addi.d $sp, $sp, 16
134134
; CHECK-NEXT: ret
135135
%v = load volatile <4 x i32>, ptr %src
136136
%v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx
@@ -141,15 +141,15 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
141141
define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
142142
; CHECK-LABEL: insert_2xi64_idx:
143143
; CHECK: # %bb.0:
144-
; CHECK-NEXT: addi.d $sp, $sp, -16
145-
; CHECK-NEXT: vld $vr0, $a0, 0
146-
; CHECK-NEXT: vst $vr0, $sp, 0
147-
; CHECK-NEXT: addi.d $a0, $sp, 0
148-
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
149-
; CHECK-NEXT: st.d $a2, $a0, 0
150-
; CHECK-NEXT: vld $vr0, $sp, 0
144+
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
145+
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
146+
; CHECK-NEXT: vld $vr1, $a0, 0
147+
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
148+
; CHECK-NEXT: vreplgr2vr.d $vr2, $a0
149+
; CHECK-NEXT: vseq.d $vr0, $vr2, $vr0
150+
; CHECK-NEXT: vreplgr2vr.d $vr2, $a2
151+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
151152
; CHECK-NEXT: vst $vr0, $a1, 0
152-
; CHECK-NEXT: addi.d $sp, $sp, 16
153153
; CHECK-NEXT: ret
154154
%v = load volatile <2 x i64>, ptr %src
155155
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx
@@ -160,15 +160,16 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
160160
define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind {
161161
; CHECK-LABEL: insert_4xfloat_idx:
162162
; CHECK: # %bb.0:
163-
; CHECK-NEXT: addi.d $sp, $sp, -16
164-
; CHECK-NEXT: vld $vr1, $a0, 0
165-
; CHECK-NEXT: vst $vr1, $sp, 0
166-
; CHECK-NEXT: addi.d $a0, $sp, 0
167-
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
168-
; CHECK-NEXT: fst.s $fa0, $a0, 0
169-
; CHECK-NEXT: vld $vr0, $sp, 0
163+
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
164+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
165+
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
166+
; CHECK-NEXT: vld $vr2, $a0, 0
167+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
168+
; CHECK-NEXT: vreplgr2vr.w $vr3, $a0
169+
; CHECK-NEXT: vseq.w $vr1, $vr3, $vr1
170+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
171+
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
170172
; CHECK-NEXT: vst $vr0, $a1, 0
171-
; CHECK-NEXT: addi.d $sp, $sp, 16
172173
; CHECK-NEXT: ret
173174
%v = load volatile <4 x float>, ptr %src
174175
%v_new = insertelement <4 x float> %v, float %ins, i32 %idx
@@ -179,15 +180,16 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
179180
define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind {
180181
; CHECK-LABEL: insert_2xdouble_idx:
181182
; CHECK: # %bb.0:
182-
; CHECK-NEXT: addi.d $sp, $sp, -16
183-
; CHECK-NEXT: vld $vr1, $a0, 0
184-
; CHECK-NEXT: vst $vr1, $sp, 0
185-
; CHECK-NEXT: addi.d $a0, $sp, 0
186-
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
187-
; CHECK-NEXT: fst.d $fa0, $a0, 0
188-
; CHECK-NEXT: vld $vr0, $sp, 0
183+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
184+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
185+
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
186+
; CHECK-NEXT: vld $vr2, $a0, 0
187+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
188+
; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
189+
; CHECK-NEXT: vseq.d $vr1, $vr3, $vr1
190+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
191+
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
189192
; CHECK-NEXT: vst $vr0, $a1, 0
190-
; CHECK-NEXT: addi.d $sp, $sp, 16
191193
; CHECK-NEXT: ret
192194
%v = load volatile <2 x double>, ptr %src
193195
%v_new = insertelement <2 x double> %v, double %ins, i32 %idx

0 commit comments

Comments
 (0)