Skip to content

Commit 644129f

Browse files
committed
[LoongArch] Optimize extractelement containing variable index
1 parent 3466cdb commit 644129f

File tree

4 files changed

+37
-29
lines changed

4 files changed

+37
-29
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2608,13 +2608,29 @@ SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
26082608
SDValue
26092609
LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
26102610
SelectionDAG &DAG) const {
2611-
EVT VecTy = Op->getOperand(0)->getValueType(0);
2611+
MVT EltVT = Op.getSimpleValueType();
2612+
SDValue Vec = Op->getOperand(0);
2613+
EVT VecTy = Vec->getValueType(0);
26122614
SDValue Idx = Op->getOperand(1);
26132615
unsigned NumElts = VecTy.getVectorNumElements();
2616+
SDLoc DL(Op);
2617+
2618+
assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
26142619

26152620
if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
26162621
return Op;
26172622

2623+
// TODO: Deal with other legal 256-bits vector types?
2624+
if (!isa<ConstantSDNode>(Idx) &&
2625+
(VecTy == MVT::v8i32 || VecTy == MVT::v8f32)) {
2626+
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2627+
SDValue SplatValue =
2628+
DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2629+
2630+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2631+
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2632+
}
2633+
26182634
return SDValue();
26192635
}
26202636

@@ -6632,6 +6648,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
66326648
NODE_NAME_CASE(VREPLVEI)
66336649
NODE_NAME_CASE(VREPLGR2VR)
66346650
NODE_NAME_CASE(XVPERMI)
6651+
NODE_NAME_CASE(XVPERM)
66356652
NODE_NAME_CASE(VPICK_SEXT_ELT)
66366653
NODE_NAME_CASE(VPICK_ZEXT_ELT)
66376654
NODE_NAME_CASE(VREPLVE)

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ enum NodeType : unsigned {
141141
VREPLVEI,
142142
VREPLGR2VR,
143143
XVPERMI,
144+
XVPERM,
144145

145146
// Extended vector element extraction
146147
VPICK_SEXT_ELT,

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
def SDT_LoongArchXVPERM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
14+
SDTCisVec<2>, SDTCisInt<2>]>;
15+
1316
// Target nodes.
1417
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
18+
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
1519
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
1620
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
1721
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1835,6 +1839,12 @@ def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8),
18351839
def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8),
18361840
(XVPERMI_D v4f64:$xj, immZExt8: $ui8)>;
18371841

1842+
// XVPERM_W
1843+
def : Pat<(loongarch_xvperm v8i32:$xj, v8i32:$xk),
1844+
(XVPERM_W v8i32:$xj, v8i32:$xk)>;
1845+
def : Pat<(loongarch_xvperm v8f32:$xj, v8i32:$xk),
1846+
(XVPERM_W v8f32:$xj, v8i32:$xk)>;
1847+
18381848
// XVREPLVE0_{W/D}
18391849
def : Pat<(lasxsplatf32 FPR32:$fj),
18401850
(XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -126,21 +126,11 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
126126
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
127127
; CHECK-LABEL: extract_8xi32_idx:
128128
; CHECK: # %bb.0:
129-
; CHECK-NEXT: addi.d $sp, $sp, -96
130-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
131-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
132-
; CHECK-NEXT: addi.d $fp, $sp, 96
133-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
134129
; CHECK-NEXT: xvld $xr0, $a0, 0
135-
; CHECK-NEXT: xvst $xr0, $sp, 32
136-
; CHECK-NEXT: addi.d $a0, $sp, 32
137-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
138-
; CHECK-NEXT: ld.w $a0, $a0, 0
139-
; CHECK-NEXT: st.w $a0, $a1, 0
140-
; CHECK-NEXT: addi.d $sp, $fp, -96
141-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
142-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
143-
; CHECK-NEXT: addi.d $sp, $sp, 96
130+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
131+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
132+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
133+
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
144134
; CHECK-NEXT: ret
145135
%v = load volatile <8 x i32>, ptr %src
146136
%e = extractelement <8 x i32> %v, i32 %idx
@@ -176,21 +166,11 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
176166
define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
177167
; CHECK-LABEL: extract_8xfloat_idx:
178168
; CHECK: # %bb.0:
179-
; CHECK-NEXT: addi.d $sp, $sp, -96
180-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
181-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
182-
; CHECK-NEXT: addi.d $fp, $sp, 96
183-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
184169
; CHECK-NEXT: xvld $xr0, $a0, 0
185-
; CHECK-NEXT: xvst $xr0, $sp, 32
186-
; CHECK-NEXT: addi.d $a0, $sp, 32
187-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
188-
; CHECK-NEXT: fld.s $fa0, $a0, 0
189-
; CHECK-NEXT: fst.s $fa0, $a1, 0
190-
; CHECK-NEXT: addi.d $sp, $fp, -96
191-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
192-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
193-
; CHECK-NEXT: addi.d $sp, $sp, 96
170+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
171+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
172+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
173+
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
194174
; CHECK-NEXT: ret
195175
%v = load volatile <8 x float>, ptr %src
196176
%e = extractelement <8 x float> %v, i32 %idx

0 commit comments

Comments
 (0)