Skip to content

Commit 94d374a

Browse files
[LLVM][CGP] Allow finer control for sinking compares. (#151366)
Compare sinking is selectable based on the result of hasMultipleConditionRegisters. This function is too coarse grained by not taking into account the differences between scalar and vector compares. This PR extends the interface to take an EVT to allow finer control. The new interface is used by AArch64 to disable sinking of scalable vector compares, but with isProfitableToSinkOperands updated to maintain the cases that are specifically tested.
1 parent 11eeb4d commit 94d374a

File tree

10 files changed

+84
-37
lines changed

10 files changed

+84
-37
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -518,10 +518,12 @@ class LLVM_ABI TargetLoweringBase {
518518
return true;
519519
}
520520

521-
/// Return true if multiple condition registers are available.
522-
bool hasMultipleConditionRegisters() const {
523-
return HasMultipleConditionRegisters;
524-
}
521+
/// Does the target have multiple (allocatable) condition registers that
522+
/// can be used to store the results of comparisons for use by selects
523+
/// and conditional branches. With multiple condition registers, the code
524+
/// generator will not aggressively sink comparisons into the blocks of their
525+
/// users.
526+
virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }
525527

526528
/// Return true if the target has BitExtract instructions.
527529
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
@@ -2453,7 +2455,7 @@ class LLVM_ABI TargetLoweringBase {
24532455
EVT VT) const {
24542456
// If a target has multiple condition registers, then it likely has logical
24552457
// operations on those registers.
2456-
if (hasMultipleConditionRegisters())
2458+
if (hasMultipleConditionRegisters(VT))
24572459
return false;
24582460
// Only do the transform if the value won't be split into multiple
24592461
// registers.
@@ -2560,15 +2562,6 @@ class LLVM_ABI TargetLoweringBase {
25602562
StackPointerRegisterToSaveRestore = R;
25612563
}
25622564

2563-
/// Tells the code generator that the target has multiple (allocatable)
2564-
/// condition registers that can be used to store the results of comparisons
2565-
/// for use by selects and conditional branches. With multiple condition
2566-
/// registers, the code generator will not aggressively sink comparisons into
2567-
/// the blocks of their users.
2568-
void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2569-
HasMultipleConditionRegisters = hasManyRegs;
2570-
}
2571-
25722565
/// Tells the code generator that the target has BitExtract instructions.
25732566
/// The code generator will aggressively sink "shift"s into the blocks of
25742567
/// their users if the users will generate "and" instructions which can be
@@ -3604,13 +3597,6 @@ class LLVM_ABI TargetLoweringBase {
36043597
private:
36053598
const TargetMachine &TM;
36063599

3607-
/// Tells the code generator that the target has multiple (allocatable)
3608-
/// condition registers that can be used to store the results of comparisons
3609-
/// for use by selects and conditional branches. With multiple condition
3610-
/// registers, the code generator will not aggressively sink comparisons into
3611-
/// the blocks of their users.
3612-
bool HasMultipleConditionRegisters;
3613-
36143600
/// Tells the code generator that the target has BitExtract instructions.
36153601
/// The code generator will aggressively sink "shift"s into the blocks of
36163602
/// their users if the users will generate "and" instructions which can be

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
18341834
///
18351835
/// Return true if any changes are made.
18361836
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1837-
if (TLI.hasMultipleConditionRegisters())
1837+
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
18381838
return false;
18391839

18401840
// Avoid sinking soft-FP comparisons, since this can move them into a loop.

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
697697
MaxGluedStoresPerMemcpy = 0;
698698
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
699699
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
700-
HasMultipleConditionRegisters = false;
701700
HasExtractBitsInsn = false;
702701
JumpIsExpensive = JumpIsExpensiveOverride;
703702
PredictableSelectIsExpensive = false;

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,10 @@ class AArch64TargetLowering : public TargetLowering {
887887
bool shouldScalarizeBinop(SDValue VecOp) const override {
888888
return VecOp.getOpcode() == ISD::SETCC;
889889
}
890+
891+
bool hasMultipleConditionRegisters(EVT VT) const override {
892+
return VT.isScalableVector();
893+
}
890894
};
891895

892896
namespace AArch64 {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6284,10 +6284,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
62846284
}
62856285
}
62866286

6287-
auto ShouldSinkCondition = [](Value *Cond) -> bool {
6287+
auto ShouldSinkCondition = [](Value *Cond,
6288+
SmallVectorImpl<Use *> &Ops) -> bool {
6289+
if (!isa<IntrinsicInst>(Cond))
6290+
return false;
62886291
auto *II = dyn_cast<IntrinsicInst>(Cond);
6289-
return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
6290-
isa<ScalableVectorType>(II->getOperand(0)->getType());
6292+
if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6293+
!isa<ScalableVectorType>(II->getOperand(0)->getType()))
6294+
return false;
6295+
if (isa<CmpInst>(II->getOperand(0)))
6296+
Ops.push_back(&II->getOperandUse(0));
6297+
return true;
62916298
};
62926299

62936300
switch (I->getOpcode()) {
@@ -6303,7 +6310,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
63036310
}
63046311
break;
63056312
case Instruction::Select: {
6306-
if (!ShouldSinkCondition(I->getOperand(0)))
6313+
if (!ShouldSinkCondition(I->getOperand(0), Ops))
63076314
return false;
63086315

63096316
Ops.push_back(&I->getOperandUse(0));
@@ -6313,7 +6320,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
63136320
if (cast<BranchInst>(I)->isUnconditional())
63146321
return false;
63156322

6316-
if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition()))
6323+
if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition(), Ops))
63176324
return false;
63186325

63196326
Ops.push_back(&I->getOperandUse(0));

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -601,14 +601,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
601601
setSchedulingPreference(Sched::RegPressure);
602602
setJumpIsExpensive(true);
603603

604-
// FIXME: This is only partially true. If we have to do vector compares, any
605-
// SGPR pair can be a condition register. If we have a uniform condition, we
606-
// are better off doing SALU operations, where there is only one SCC. For now,
607-
// we don't have a way of knowing during instruction selection if a condition
608-
// will be uniform and we always use vector compares. Assume we are using
609-
// vector compares until that is fixed.
610-
setHasMultipleConditionRegisters(true);
611-
612604
setMinCmpXchgSizeInBits(32);
613605
setSupportsUnalignedAtomics(false);
614606

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,16 @@ class AMDGPUTargetLowering : public TargetLowering {
388388
MVT getFenceOperandTy(const DataLayout &DL) const override {
389389
return MVT::i32;
390390
}
391+
392+
bool hasMultipleConditionRegisters(EVT VT) const override {
393+
// FIXME: This is only partially true. If we have to do vector compares, any
394+
// SGPR pair can be a condition register. If we have a uniform condition, we
395+
// are better off doing SALU operations, where there is only one SCC. For
396+
// now, we don't have a way of knowing during instruction selection if a
397+
// condition will be uniform and we always use vector compares. Assume we
398+
// are using vector compares until that is fixed.
399+
return true;
400+
}
391401
};
392402

393403
namespace AMDGPUISD {

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
14331433
// With 32 condition bits, we don't need to sink (and duplicate) compares
14341434
// aggressively in CodeGenPrep.
14351435
if (Subtarget.useCRBits()) {
1436-
setHasMultipleConditionRegisters();
14371436
setJumpIsExpensive();
14381437
}
14391438

@@ -19856,3 +19855,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1985619855
return Builder.CreateOr(
1985719856
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
1985819857
}
19858+
19859+
bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
19860+
return Subtarget.useCRBits();
19861+
}

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,8 @@ namespace llvm {
12071207
bool IsVarArg) const;
12081208
bool supportsTailCallFor(const CallBase *CB) const;
12091209

1210+
bool hasMultipleConditionRegisters(EVT VT) const override;
1211+
12101212
private:
12111213
struct ReuseLoadInfo {
12121214
SDValue Ptr;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -S < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define void @do_not_sink_scalable_vector_compare(ptr %a, ptr %b) #0 {
7+
; CHECK-LABEL: define void @do_not_sink_scalable_vector_compare(
8+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: [[ENTRY:.*]]:
10+
; CHECK-NEXT: [[STEP_VECTOR:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
11+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <vscale x 4 x i32> [[STEP_VECTOR]], splat (i32 16)
12+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13+
; CHECK: [[VECTOR_BODY]]:
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[INDEX]]
16+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[SRC]], i32 4, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> poison)
17+
; CHECK-NEXT: [[DST:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 [[INDEX]]
18+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_LOAD]], ptr [[DST]], i32 4, <vscale x 4 x i1> [[TMP0]])
19+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
20+
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
21+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[VECTOR_END:.*]], label %[[VECTOR_BODY]]
22+
; CHECK: [[VECTOR_END]]:
23+
; CHECK-NEXT: ret void
24+
;
25+
entry:
26+
%step.vector = call <vscale x 4 x i32> @llvm.stepvector()
27+
%mask = icmp ult <vscale x 4 x i32> %step.vector, splat (i32 16)
28+
br label %vector.body
29+
30+
vector.body:
31+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
32+
%src = getelementptr inbounds ptr, ptr %a, i64 %index
33+
%wide.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr %src, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> poison)
34+
%dst = getelementptr inbounds ptr, ptr %b, i64 %index
35+
call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %wide.load, ptr %dst, i32 4, <vscale x 4 x i1> %mask)
36+
%index.next = add nuw i64 %index, 16
37+
%exit.cond = icmp eq i64 %index.next, 1024
38+
br i1 %exit.cond, label %vector.end, label %vector.body
39+
40+
vector.end:
41+
ret void
42+
}
43+
44+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)