-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[LoongArch] Optimize insertelement containing variable index using compare+select #151131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesFull diff: https://github.com/llvm/llvm-project/pull/151131.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d96136cddb3b0..6f01c7bd06111 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2621,8 +2621,40 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
- if (isa<ConstantSDNode>(Op->getOperand(2)))
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ if (isa<ConstantSDNode>(Op2)) {
return Op;
+ } else {
+ MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
+ MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
+
+ if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
+ return SDValue();
+
+ SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
+ SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
+
+ SmallVector<SDValue, 32> RawIndices;
+ for (unsigned i = 0; i < NumElts; ++i)
+ RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+
+ // insert vec, elt, idx
+ // =>
+ // select (splatidx == {0,1,2...}) ? splatelt : vec
+ SDValue SelectCC =
+ DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index c1d4220fc1166..e5a85245710de 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -114,22 +114,15 @@ define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_32xi8_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
-; CHECK-NEXT: st.b $a2, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT: xvld $xr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
+; CHECK-NEXT: xvseq.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
@@ -140,22 +133,15 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_16xi16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
-; CHECK-NEXT: st.h $a2, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT: xvld $xr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
+; CHECK-NEXT: xvseq.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
@@ -166,22 +152,15 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xi32_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
-; CHECK-NEXT: st.w $a2, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT: xvld $xr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
+; CHECK-NEXT: xvseq.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvreplgr2vr.w $xr2, $a2
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
@@ -192,22 +171,15 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xi64_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
-; CHECK-NEXT: st.d $a2, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI11_0)
+; CHECK-NEXT: xvld $xr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: xvreplgr2vr.d $xr2, $a0
+; CHECK-NEXT: xvseq.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
@@ -218,22 +190,16 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xfloat_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: xvst $xr1, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
-; CHECK-NEXT: fst.s $fa0, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI12_0)
+; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI12_0)
+; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr3, $a0
+; CHECK-NEXT: xvseq.w $xr1, $xr3, $xr1
+; CHECK-NEXT: xvreplve0.w $xr0, $xr0
+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
@@ -244,22 +210,16 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xdouble_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: xvst $xr1, $sp, 32
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
-; CHECK-NEXT: fst.d $fa0, $a0, 0
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI13_0)
+; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI13_0)
+; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
+; CHECK-NEXT: xvseq.d $xr1, $xr3, $xr1
+; CHECK-NEXT: xvreplve0.d $xr0, $xr0
+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%v_new = insertelement <4 x double> %v, double %in, i32 %idx
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
index c73252bd1335f..4bb1941724dc6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
@@ -84,15 +84,15 @@ define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_16xi8_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
-; CHECK-NEXT: st.b $a2, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT: vld $vr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: vreplgr2vr.b $vr2, $a0
+; CHECK-NEXT: vseq.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vreplgr2vr.b $vr2, $a2
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx
@@ -103,15 +103,15 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xi16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
-; CHECK-NEXT: st.h $a2, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT: vld $vr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
+; CHECK-NEXT: vseq.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vreplgr2vr.h $vr2, $a2
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx
@@ -122,15 +122,15 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xi32_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
-; CHECK-NEXT: st.w $a2, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT: vld $vr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
+; CHECK-NEXT: vseq.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vreplgr2vr.w $vr2, $a2
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx
@@ -141,15 +141,15 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_2xi64_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
-; CHECK-NEXT: st.d $a2, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT: vld $vr1, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: vreplgr2vr.d $vr2, $a0
+; CHECK-NEXT: vseq.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vreplgr2vr.d $vr2, $a2
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx
@@ -160,15 +160,16 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xfloat_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: vst $vr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
-; CHECK-NEXT: fst.s $fa0, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: vreplgr2vr.w $vr3, $a0
+; CHECK-NEXT: vseq.w $vr1, $vr3, $vr1
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
%v_new = insertelement <4 x float> %v, float %ins, i32 %idx
@@ -179,15 +180,16 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_2xdouble_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: vst $vr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
-; CHECK-NEXT: fst.d $fa0, $a0, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
+; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
+; CHECK-NEXT: vseq.d $vr1, $vr3, $vr1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
%v_new = insertelement <2 x double> %v, double %ins, i32 %idx
|
According to https://llvm.org/docs/LangRef.html#insertelement-instruction: Before this commit, seems that element in Anyway, both are right? |
|
||
SmallVector<SDValue, 32> RawIndices; | ||
for (unsigned i = 0; i < NumElts; ++i) | ||
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT())); | |
RawIndices.push_back(DAG.getConstant(i, DL, IdxTy)); |
I'm not sure...
/// Return an ISD::BUILD_VECTOR node. The number of elements in VT,
/// which must be a vector type, must match the number of operands in Ops.
/// The operands must have the same type as (or, for integers, a type wider
/// than) VT's element type.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef<SDValue> Ops) {
// VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getBuildVector()
allows "The operands must have the same type as (or, for integers, a type wider than) VT's element type.
". And we must ensure the elements of BUILD_VECTOR
we built are type legal (ie. i64
. i8
/i16
/i32
are illegal for loongarch64) at this point.
When I tried to use IdxTy
here, assertion occurs:
Legalizing: t38: v16i8 = BUILD_VECTOR Constant:i8<0>, Constant:i8<1>, ... , Constant:i8<15>
llc: /.../llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:991: void (anonymous namespace)::SelectionDAGLegalize::LegalizeOp(SDNode *): Assertion `(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"' failed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about loongarch32 if we use Subtarget.getGRLenVT()
when the vector is v{2,4}i64 or v{2,4}f64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
v2i64/v4i64
is okay, because of the split of i64
in the initial selectiondag.
v2f64/v4f64
occur an assertion error when verifyNode()
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. Other tests may also have issues with loongarch32 + lsx/lasx
. I think @heiher will handle them later.
No description provided.