Skip to content

Commit c9714d2

Browse files
authored
[RISCV] Add profitability checks to SelectAddrRegRegScale. (#150135)
-Only fold if the ADD can be folded into all uses. -Don't reassociate an ADDI if the shl+add can be a shxadd or similar instruction. -Only reassociate a single ADDI. If there are 2 addis it's the same number of instructions as shl+add. If there are more than 2 that it would increase instructions over folding the addis into the loads/stores.
1 parent 23eef9a commit c9714d2

File tree

3 files changed

+99
-34
lines changed

3 files changed

+99
-34
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3032,6 +3032,63 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
30323032
return true;
30333033
}
30343034

3035+
/// Return true if this a load/store that we have a RegRegScale instruction for.
3036+
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
3037+
const RISCVSubtarget &Subtarget) {
3038+
if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3039+
return false;
3040+
EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3041+
if (!(VT.isScalarInteger() &&
3042+
(Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3043+
!((VT == MVT::f32 || VT == MVT::f64) &&
3044+
Subtarget.hasVendorXTHeadFMemIdx()))
3045+
return false;
3046+
// Don't allow stores of the value. It must be used as the address.
3047+
if (User->getOpcode() == ISD::STORE &&
3048+
cast<StoreSDNode>(User)->getValue() == Add)
3049+
return false;
3050+
3051+
return true;
3052+
}
3053+
3054+
/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3055+
/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3056+
/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3057+
/// single addi and we don't have a SHXADD instruction we could use.
3058+
/// FIXME: May still need to check how many and what kind of users the SHL has.
3059+
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
3060+
SDValue Add,
3061+
SDValue Shift = SDValue()) {
3062+
bool FoundADDI = false;
3063+
for (auto *User : Add->users()) {
3064+
if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3065+
continue;
3066+
3067+
// Allow a single ADDI that is used by loads/stores if we matched a shift.
3068+
if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3069+
!isa<ConstantSDNode>(User->getOperand(1)) ||
3070+
!isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3071+
return false;
3072+
3073+
FoundADDI = true;
3074+
3075+
// If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3076+
assert(Shift.getOpcode() == ISD::SHL);
3077+
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3078+
if ((ShiftAmt <= 3 &&
3079+
(Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
3080+
(ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
3081+
return false;
3082+
3083+
// All users of the ADDI should be load/store.
3084+
for (auto *ADDIUser : User->users())
3085+
if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3086+
return false;
3087+
}
3088+
3089+
return true;
3090+
}
3091+
30353092
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
30363093
unsigned MaxShiftAmount,
30373094
SDValue &Base, SDValue &Index,
@@ -3062,7 +3119,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
30623119
if (LHS.getOpcode() == ISD::ADD &&
30633120
!isa<ConstantSDNode>(LHS.getOperand(1)) &&
30643121
isInt<12>(C1->getSExtValue())) {
3065-
if (SelectShl(LHS.getOperand(1), Index, Scale)) {
3122+
if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3123+
isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
30663124
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
30673125
SDLoc(Addr), VT);
30683126
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3072,7 +3130,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
30723130
}
30733131

30743132
// Add is commutative so we need to check both operands.
3075-
if (SelectShl(LHS.getOperand(0), Index, Scale)) {
3133+
if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3134+
isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
30763135
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
30773136
SDLoc(Addr), VT);
30783137
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3090,16 +3149,23 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
30903149

30913150
// Try to match a shift on the RHS.
30923151
if (SelectShl(RHS, Index, Scale)) {
3152+
if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3153+
return false;
30933154
Base = LHS;
30943155
return true;
30953156
}
30963157

30973158
// Try to match a shift on the LHS.
30983159
if (SelectShl(LHS, Index, Scale)) {
3160+
if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3161+
return false;
30993162
Base = RHS;
31003163
return true;
31013164
}
31023165

3166+
if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3167+
return false;
3168+
31033169
Base = LHS;
31043170
Index = RHS;
31053171
Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);

llvm/test/CodeGen/RISCV/xqcisls.ll

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,13 @@ define i64 @lrd(ptr %a, i32 %b) {
308308
;
309309
; RV32IZBAXQCISLS-LABEL: lrd:
310310
; RV32IZBAXQCISLS: # %bb.0:
311-
; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a0, a1, 3
312-
; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
313-
; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
314-
; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
315-
; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
316-
; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
317-
; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
311+
; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
312+
; RV32IZBAXQCISLS-NEXT: lw a1, 0(a0)
313+
; RV32IZBAXQCISLS-NEXT: lw a2, 4(a0)
314+
; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
315+
; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
316+
; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
317+
; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
318318
; RV32IZBAXQCISLS-NEXT: ret
319319
%1 = getelementptr i64, ptr %a, i32 %b
320320
%2 = load i64, ptr %1, align 8
@@ -348,14 +348,13 @@ define i64 @lrd_2(ptr %a, i32 %b) {
348348
;
349349
; RV32IZBAXQCISLS-LABEL: lrd_2:
350350
; RV32IZBAXQCISLS: # %bb.0:
351-
; RV32IZBAXQCISLS-NEXT: addi a2, a0, 96
352-
; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a2, a1, 3
353-
; RV32IZBAXQCISLS-NEXT: addi a0, a0, 100
354-
; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
355-
; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
356-
; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
357-
; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
358-
; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
351+
; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
352+
; RV32IZBAXQCISLS-NEXT: lw a1, 96(a0)
353+
; RV32IZBAXQCISLS-NEXT: lw a2, 100(a0)
354+
; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
355+
; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
356+
; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
357+
; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
359358
; RV32IZBAXQCISLS-NEXT: ret
360359
%1 = add i32 %b, 12
361360
%2 = getelementptr i64, ptr %a, i32 %1
@@ -472,11 +471,11 @@ define void @srd(ptr %a, i32 %b, i64 %c) {
472471
; RV32IZBAXQCISLS: # %bb.0:
473472
; RV32IZBAXQCISLS-NEXT: add a4, a2, a2
474473
; RV32IZBAXQCISLS-NEXT: add a3, a3, a3
475-
; RV32IZBAXQCISLS-NEXT: sltu a2, a4, a2
476-
; RV32IZBAXQCISLS-NEXT: qc.srw a4, a0, a1, 3
477-
; RV32IZBAXQCISLS-NEXT: add a2, a3, a2
478-
; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
479-
; RV32IZBAXQCISLS-NEXT: qc.srw a2, a0, a1, 3
474+
; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
475+
; RV32IZBAXQCISLS-NEXT: sltu a1, a4, a2
476+
; RV32IZBAXQCISLS-NEXT: add a1, a3, a1
477+
; RV32IZBAXQCISLS-NEXT: sw a4, 0(a0)
478+
; RV32IZBAXQCISLS-NEXT: sw a1, 4(a0)
480479
; RV32IZBAXQCISLS-NEXT: ret
481480
%1 = add i64 %c, %c
482481
%2 = getelementptr i64, ptr %a, i32 %b
@@ -503,10 +502,10 @@ define i64 @lrd_large_shift(ptr %a, i32 %b) {
503502
;
504503
; RV32IZBAXQCISLS-LABEL: lrd_large_shift:
505504
; RV32IZBAXQCISLS: # %bb.0:
506-
; RV32IZBAXQCISLS-NEXT: addi a2, a0, 384
507-
; RV32IZBAXQCISLS-NEXT: addi a3, a0, 388
508-
; RV32IZBAXQCISLS-NEXT: qc.lrw a0, a2, a1, 5
509-
; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a3, a1, 5
505+
; RV32IZBAXQCISLS-NEXT: slli a1, a1, 5
506+
; RV32IZBAXQCISLS-NEXT: add a1, a1, a0
507+
; RV32IZBAXQCISLS-NEXT: lw a0, 384(a1)
508+
; RV32IZBAXQCISLS-NEXT: lw a1, 388(a1)
510509
; RV32IZBAXQCISLS-NEXT: ret
511510
%1 = add i32 %b, 12
512511
%2 = shl i32 %1, 2

llvm/test/CodeGen/RISCV/xtheadmemidx.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -704,14 +704,14 @@ define i64 @lrd(ptr %a, iXLen %b) {
704704
define i64 @lrd_2(ptr %a, iXLen %b) {
705705
; RV32XTHEADMEMIDX-LABEL: lrd_2:
706706
; RV32XTHEADMEMIDX: # %bb.0:
707-
; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96
708-
; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3
709-
; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100
710-
; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3
711-
; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2
712-
; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2
713-
; RV32XTHEADMEMIDX-NEXT: add a1, a1, a1
714-
; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
707+
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
708+
; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0
709+
; RV32XTHEADMEMIDX-NEXT: lw a1, 96(a0)
710+
; RV32XTHEADMEMIDX-NEXT: lw a2, 100(a0)
711+
; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
712+
; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
713+
; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
714+
; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
715715
; RV32XTHEADMEMIDX-NEXT: ret
716716
;
717717
; RV64XTHEADMEMIDX-LABEL: lrd_2:

0 commit comments

Comments
 (0)