diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index cbdc1b6031680..52729e9e7cee7 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -518,10 +518,12 @@ class LLVM_ABI TargetLoweringBase { return true; } - /// Return true if multiple condition registers are available. - bool hasMultipleConditionRegisters() const { - return HasMultipleConditionRegisters; - } + /// Does the target have multiple (allocatable) condition registers that + /// can be used to store the results of comparisons for use by selects + /// and conditional branches. With multiple condition registers, the code + /// generator will not aggressively sink comparisons into the blocks of their + /// users. + virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; } /// Return true if the target has BitExtract instructions. bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } @@ -2453,7 +2455,7 @@ class LLVM_ABI TargetLoweringBase { EVT VT) const { // If a target has multiple condition registers, then it likely has logical // operations on those registers. - if (hasMultipleConditionRegisters()) + if (hasMultipleConditionRegisters(VT)) return false; // Only do the transform if the value won't be split into multiple // registers. @@ -2560,15 +2562,6 @@ class LLVM_ABI TargetLoweringBase { StackPointerRegisterToSaveRestore = R; } - /// Tells the code generator that the target has multiple (allocatable) - /// condition registers that can be used to store the results of comparisons - /// for use by selects and conditional branches. With multiple condition - /// registers, the code generator will not aggressively sink comparisons into - /// the blocks of their users. - void setHasMultipleConditionRegisters(bool hasManyRegs = true) { - HasMultipleConditionRegisters = hasManyRegs; - } - /// Tells the code generator that the target has BitExtract instructions. /// The code generator will aggressively sink "shift"s into the blocks of /// their users if the users will generate "and" instructions which can be @@ -3604,13 +3597,6 @@ class LLVM_ABI TargetLoweringBase { private: const TargetMachine &TM; - /// Tells the code generator that the target has multiple (allocatable) - /// condition registers that can be used to store the results of comparisons - /// for use by selects and conditional branches. With multiple condition - /// registers, the code generator will not aggressively sink comparisons into - /// the blocks of their users. - bool HasMultipleConditionRegisters; - /// Tells the code generator that the target has BitExtract instructions. /// The code generator will aggressively sink "shift"s into the blocks of /// their users if the users will generate "and" instructions which can be diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 416c56d5a36f8..7e0c45e92b365 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) { /// /// Return true if any changes are made. static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { - if (TLI.hasMultipleConditionRegisters()) + if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()))) return false; // Avoid sinking soft-FP comparisons, since this can move them into a loop. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3c91b0eb4e2ea..9b22033c7f4ac 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) MaxGluedStoresPerMemcpy = 0; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; - HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ea63edd86210e..88876570ac811 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -887,6 +887,10 @@ class AArch64TargetLowering : public TargetLowering { bool shouldScalarizeBinop(SDValue VecOp) const override { return VecOp.getOpcode() == ISD::SETCC; } + + bool hasMultipleConditionRegisters(EVT VT) const override { + return VT.isScalableVector(); + } }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 40f49dade6131..1afc777d2b3b5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -6225,10 +6225,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } - auto ShouldSinkCondition = [](Value *Cond) -> bool { + auto ShouldSinkCondition = [](Value *Cond, + SmallVectorImpl &Ops) -> bool { + if (!isa(Cond)) + return false; auto *II = dyn_cast(Cond); - return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or && - isa(II->getOperand(0)->getType()); + if (II->getIntrinsicID() != Intrinsic::vector_reduce_or || + !isa(II->getOperand(0)->getType())) + return false; + if (isa(II->getOperand(0))) + Ops.push_back(&II->getOperandUse(0)); + return true; }; switch (I->getOpcode()) { @@ -6244,7 +6251,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } break; case Instruction::Select: { - if (!ShouldSinkCondition(I->getOperand(0))) + if (!ShouldSinkCondition(I->getOperand(0), Ops)) return false; Ops.push_back(&I->getOperandUse(0)); @@ -6254,7 +6261,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( if (cast(I)->isUnconditional()) return false; - if (!ShouldSinkCondition(cast(I)->getCondition())) + if (!ShouldSinkCondition(cast(I)->getCondition(), Ops)) return false; Ops.push_back(&I->getOperandUse(0)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 61189337e5233..e32e7a2f92832 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -589,14 +589,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setSchedulingPreference(Sched::RegPressure); setJumpIsExpensive(true); - // FIXME: This is only partially true. If we have to do vector compares, any - // SGPR pair can be a condition register. If we have a uniform condition, we - // are better off doing SALU operations, where there is only one SCC. For now, - // we don't have a way of knowing during instruction selection if a condition - // will be uniform and we always use vector compares. Assume we are using - // vector compares until that is fixed. - setHasMultipleConditionRegisters(true); - setMinCmpXchgSizeInBits(32); setSupportsUnalignedAtomics(false); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 39bb0adfc1a17..fd5d5b8dec431 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -388,6 +388,16 @@ class AMDGPUTargetLowering : public TargetLowering { MVT getFenceOperandTy(const DataLayout &DL) const override { return MVT::i32; } + + bool hasMultipleConditionRegisters(EVT VT) const override { + // FIXME: This is only partially true. If we have to do vector compares, any + // SGPR pair can be a condition register. If we have a uniform condition, we + // are better off doing SALU operations, where there is only one SCC. For + // now, we don't have a way of knowing during instruction selection if a + // condition will be uniform and we always use vector compares. Assume we + // are using vector compares until that is fixed. + return true; + } }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 459525ed4ee9a..99c50dc7e145f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1433,7 +1433,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { - setHasMultipleConditionRegisters(); setJumpIsExpensive(); } @@ -19848,3 +19847,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); } + +bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const { + return Subtarget.useCRBits(); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 124c7116dc3b5..9755f0e272d16 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1207,6 +1207,8 @@ namespace llvm { bool IsVarArg) const; bool supportsTailCallFor(const CallBase *CB) const; + bool hasMultipleConditionRegisters(EVT VT) const override; + private: struct ReuseLoadInfo { SDValue Ptr; diff --git a/llvm/test/Transforms/CodeGenPrepare/dont-sink-scalable-vector-compare.ll b/llvm/test/Transforms/CodeGenPrepare/dont-sink-scalable-vector-compare.ll new file mode 100644 index 0000000000000..26de31d6f7499 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/dont-sink-scalable-vector-compare.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define void @do_not_sink_scalable_vector_compare(ptr %a, ptr %b) #0 { +; CHECK-LABEL: define void @do_not_sink_scalable_vector_compare( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[STEP_VECTOR:%.*]] = call @llvm.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult [[STEP_VECTOR]], splat (i32 16) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[SRC]], i32 4, [[TMP0]], poison) +; CHECK-NEXT: [[DST:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_LOAD]], ptr [[DST]], i32 4, [[TMP0]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[VECTOR_END:.*]], label %[[VECTOR_BODY]] +; CHECK: [[VECTOR_END]]: +; CHECK-NEXT: ret void +; +entry: + %step.vector = call @llvm.stepvector() + %mask = icmp ult %step.vector, splat (i32 16) + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %src = getelementptr inbounds ptr, ptr %a, i64 %index + %wide.load = call @llvm.masked.load.nxv4i32(ptr %src, i32 4, %mask, poison) + %dst = getelementptr inbounds ptr, ptr %b, i64 %index + call void @llvm.masked.store.nxv4i32( %wide.load, ptr %dst, i32 4, %mask) + %index.next = add nuw i64 %index, 16 + %exit.cond = icmp eq i64 %index.next, 1024 + br i1 %exit.cond, label %vector.end, label %vector.body + +vector.end: + ret void +} + +attributes #0 = { "target-features"="+sve" }