Skip to content

[LLVM][CGP] Allow finer control for sinking compares. #151366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 7 additions & 21 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -518,10 +518,12 @@ class LLVM_ABI TargetLoweringBase {
return true;
}

/// Return true if multiple condition registers are available.
bool hasMultipleConditionRegisters() const {
return HasMultipleConditionRegisters;
}
/// Does the target have multiple (allocatable) condition registers that
/// can be used to store the results of comparisons for use by selects
/// and conditional branches. With multiple condition registers, the code
/// generator will not aggressively sink comparisons into the blocks of their
/// users.
virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }

/// Return true if the target has BitExtract instructions.
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
Expand Down Expand Up @@ -2453,7 +2455,7 @@ class LLVM_ABI TargetLoweringBase {
EVT VT) const {
// If a target has multiple condition registers, then it likely has logical
// operations on those registers.
if (hasMultipleConditionRegisters())
if (hasMultipleConditionRegisters(VT))
return false;
// Only do the transform if the value won't be split into multiple
// registers.
Expand Down Expand Up @@ -2560,15 +2562,6 @@ class LLVM_ABI TargetLoweringBase {
StackPointerRegisterToSaveRestore = R;
}

/// Tells the code generator that the target has multiple (allocatable)
/// condition registers that can be used to store the results of comparisons
/// for use by selects and conditional branches. With multiple condition
/// registers, the code generator will not aggressively sink comparisons into
/// the blocks of their users.
void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
HasMultipleConditionRegisters = hasManyRegs;
}

/// Tells the code generator that the target has BitExtract instructions.
/// The code generator will aggressively sink "shift"s into the blocks of
/// their users if the users will generate "and" instructions which can be
Expand Down Expand Up @@ -3604,13 +3597,6 @@ class LLVM_ABI TargetLoweringBase {
private:
const TargetMachine &TM;

/// Tells the code generator that the target has multiple (allocatable)
/// condition registers that can be used to store the results of comparisons
/// for use by selects and conditional branches. With multiple condition
/// registers, the code generator will not aggressively sink comparisons into
/// the blocks of their users.
bool HasMultipleConditionRegisters;

/// Tells the code generator that the target has BitExtract instructions.
/// The code generator will aggressively sink "shift"s into the blocks of
/// their users if the users will generate "and" instructions which can be
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
///
/// Return true if any changes are made.
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
if (TLI.hasMultipleConditionRegisters())
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
return false;

// Avoid sinking soft-FP comparisons, since this can move them into a loop.
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,10 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldScalarizeBinop(SDValue VecOp) const override {
return VecOp.getOpcode() == ISD::SETCC;
}

bool hasMultipleConditionRegisters(EVT VT) const override {
return VT.isScalableVector();
}
};

namespace AArch64 {
Expand Down
17 changes: 12 additions & 5 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6225,10 +6225,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
}

auto ShouldSinkCondition = [](Value *Cond) -> bool {
auto ShouldSinkCondition = [](Value *Cond,
SmallVectorImpl<Use *> &Ops) -> bool {
if (!isa<IntrinsicInst>(Cond))
return false;
auto *II = dyn_cast<IntrinsicInst>(Cond);
return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
isa<ScalableVectorType>(II->getOperand(0)->getType());
if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
!isa<ScalableVectorType>(II->getOperand(0)->getType()))
return false;
if (isa<CmpInst>(II->getOperand(0)))
Ops.push_back(&II->getOperandUse(0));
return true;
};

switch (I->getOpcode()) {
Expand All @@ -6244,7 +6251,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
break;
case Instruction::Select: {
if (!ShouldSinkCondition(I->getOperand(0)))
if (!ShouldSinkCondition(I->getOperand(0), Ops))
return false;

Ops.push_back(&I->getOperandUse(0));
Expand All @@ -6254,7 +6261,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
if (cast<BranchInst>(I)->isUnconditional())
return false;

if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition()))
if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition(), Ops))
return false;

Ops.push_back(&I->getOperandUse(0));
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,14 +589,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);

// FIXME: This is only partially true. If we have to do vector compares, any
// SGPR pair can be a condition register. If we have a uniform condition, we
// are better off doing SALU operations, where there is only one SCC. For now,
// we don't have a way of knowing during instruction selection if a condition
// will be uniform and we always use vector compares. Assume we are using
// vector compares until that is fixed.
setHasMultipleConditionRegisters(true);

setMinCmpXchgSizeInBits(32);
setSupportsUnalignedAtomics(false);

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,16 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}

bool hasMultipleConditionRegisters(EVT VT) const override {
// FIXME: This is only partially true. If we have to do vector compares, any
// SGPR pair can be a condition register. If we have a uniform condition, we
// are better off doing SALU operations, where there is only one SCC. For
// now, we don't have a way of knowing during instruction selection if a
// condition will be uniform and we always use vector compares. Assume we
// are using vector compares until that is fixed.
return true;
}
};

namespace AMDGPUISD {
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1433,7 +1433,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
setHasMultipleConditionRegisters();
setJumpIsExpensive();
}

Expand Down Expand Up @@ -19848,3 +19847,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}

bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
return Subtarget.useCRBits();
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,8 @@ namespace llvm {
bool IsVarArg) const;
bool supportsTailCallFor(const CallBase *CB) const;

bool hasMultipleConditionRegisters(EVT VT) const override;

private:
struct ReuseLoadInfo {
SDValue Ptr;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -S < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define void @do_not_sink_scalable_vector_compare(ptr %a, ptr %b) #0 {
; CHECK-LABEL: define void @do_not_sink_scalable_vector_compare(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[STEP_VECTOR:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <vscale x 4 x i32> [[STEP_VECTOR]], splat (i32 16)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[SRC]], i32 4, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[DST:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_LOAD]], ptr [[DST]], i32 4, <vscale x 4 x i1> [[TMP0]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[VECTOR_END:.*]], label %[[VECTOR_BODY]]
; CHECK: [[VECTOR_END]]:
; CHECK-NEXT: ret void
;
entry:
%step.vector = call <vscale x 4 x i32> @llvm.stepvector()
%mask = icmp ult <vscale x 4 x i32> %step.vector, splat (i32 16)
br label %vector.body

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%src = getelementptr inbounds ptr, ptr %a, i64 %index
%wide.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr %src, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> poison)
%dst = getelementptr inbounds ptr, ptr %b, i64 %index
call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %wide.load, ptr %dst, i32 4, <vscale x 4 x i1> %mask)
%index.next = add nuw i64 %index, 16
%exit.cond = icmp eq i64 %index.next, 1024
br i1 %exit.cond, label %vector.end, label %vector.body

vector.end:
ret void
}

attributes #0 = { "target-features"="+sve" }