diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4fef93cc5aec5..836a5819beba0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24135,9 +24135,6 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { - // Limit to post-legalization in order to avoid peeling truncating stores. - if (DCI.isBeforeLegalize()) - return SDValue(); if (!Subtarget->isNeonAvailable()) return SDValue(); // Source operand is already a vector. @@ -24174,6 +24171,13 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST, SDValue VecFP = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, FPSrc); SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP); + if (ST->isTruncatingStore()) { + EVT NewVecDstVT = EVT::getVectorVT( + *DAG.getContext(), ST->getMemoryVT(), + VecDstVT.getFixedSizeInBits() / ST->getMemoryVT().getFixedSizeInBits()); + VecConv = DAG.getNode(AArch64ISD::NVCAST, DL, NewVecDstVT, VecConv); + } + SDValue Zero = DAG.getVectorIdxConstant(0, DL); SDValue Extracted = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecConv, Zero); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 251fd44b6ea31..a62de87b072e9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -9273,8 +9273,12 @@ multiclass St1LanePost128Pat; +defm : St1LanePost128Pat; defm : St1LanePost128Pat; +defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; diff --git a/llvm/test/CodeGen/AArch64/store-float-conversion.ll b/llvm/test/CodeGen/AArch64/store-float-conversion.ll index c46801fc16714..1d4073f673edb 100644 --- a/llvm/test/CodeGen/AArch64/store-float-conversion.ll +++ b/llvm/test/CodeGen/AArch64/store-float-conversion.ll @@ -27,6 +27,20 @@ entry: ret void } +define ptr @f32_to_s8_inc(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s8_inc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: st1 { v0.b }[0], [x0], #1 +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i8 + %next = getelementptr i8, ptr %dst, i64 1 + store i8 %trunc, ptr %dst + ret ptr %next +} + define void @f32_to_u16(float %f, ptr %dst) { ; CHECK-LABEL: f32_to_u16: ; CHECK: // %bb.0: // %entry @@ -53,6 +67,20 @@ entry: ret void } +define ptr @f32_to_s16_inc(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s16_inc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: st1 { v0.h }[0], [x0], #2 +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i16 + %next = getelementptr i16, ptr %dst, i64 1 + store i16 %trunc, ptr %dst + ret ptr %next +} + define void @f32_to_u32(float %f, ptr %dst) { ; CHECK-LABEL: f32_to_u32: ; CHECK: // %bb.0: // %entry @@ -77,6 +105,19 @@ entry: ret void } +define ptr @f32_to_s32_inc(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s32_inc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: st1 { v0.s }[0], [x0], #4 +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %next = getelementptr i32, ptr %dst, i64 1 + store i32 %conv, ptr %dst + ret ptr %next +} + define void @f32_to_s64(float %f, ptr %dst) { ; CHECK-LABEL: f32_to_s64: ; CHECK: // %bb.0: // %entry @@ -115,6 +156,93 @@ entry: ret void } +define ptr @f64_to_s64_inc(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s64_inc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: st1 { v0.d }[0], [x0], #8 +; CHECK-NEXT: ret +entry: + %conv = fptosi double %d to i64 + %next = getelementptr i64, ptr %dst, i64 1 + store i64 %conv, ptr %dst + ret ptr %next +} + +define void @f64_to_u8(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret + %conv = fptoui double %d to i64 + %trunc = trunc i64 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define void @f64_to_s8(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret + %conv = fptosi double %d to i64 + %trunc = trunc i64 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define ptr @f64_to_s8_inc(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s8_inc: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: st1 { v0.b }[0], [x0], #1 +; CHECK-NEXT: ret + %conv = fptosi double %d to i64 + %trunc = trunc i64 %conv to i8 + store i8 %trunc, ptr %dst + %next = getelementptr i8, ptr %dst, i64 1 + ret ptr %next +} + +define void @f64_to_u16(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + %conv = fptoui double %d to i64 + %trunc = trunc i64 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define void @f64_to_s16(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + %conv = fptosi double %d to i64 + %trunc = trunc i64 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define ptr @f64_to_s16_inc(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s16_inc: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: st1 { v0.h }[0], [x0], #2 +; CHECK-NEXT: ret + %conv = fptosi double %d to i64 + %trunc = trunc i64 %conv to i16 + %next = getelementptr i16, ptr %dst, i64 1 + store i16 %trunc, ptr %dst + ret ptr %next +} + define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) { ; CHECK-LABEL: f32_to_i32_multiple_uses: ; CHECK: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index 5fc996ad921ff..223698ba225a8 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -64,8 +64,7 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcsel s2, s0, s3, mi ; CHECK-NEXT: subs w10, w10, #1 ; CHECK-NEXT: fcvtzs s2, s2 -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: strb w11, [x9], #1 +; CHECK-NEXT: st1 { v2.b }[0], [x9], #1 ; CHECK-NEXT: b.ne .LBB0_7 ; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup ; CHECK-NEXT: ret