From 457164ab6fac76050e9861e8a307cfce05ccc507 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Sun, 20 Jul 2025 19:59:42 +0800 Subject: [PATCH 1/4] [RISCV] Add TSFlag for reading past VL behaviour. NFCI Currently we have a switch statement that checks if a vector instruction may read elements past VL. However it currently doesn't account for instructions in vendor extensions. Handling all possible vendor instructions will result in quite a lot of opcodes being added, so I've created a new TSFlag that we can declare in TableGen, and added it to the existing instruction definitions. I've tried to be conservative as possible here: All SiFive vendor vector instructions should be covered by the flag, as well as all of XRivosVizip, and ri.vextract from XRivosVisni. For now this should be NFC because coincidentally, these instructions aren't handled in getOperandInfo, so RISCVVLOptimizer should currently avoid touching them despite them being liberally handled in getMinimumVLForUser. However in an upcoming patch we'll need to also bail in getMinimumVLForUser, so this prepares for it. --- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 9 ++++++ llvm/lib/Target/RISCV/RISCVInstrFormats.td | 6 ++++ llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 9 +++--- llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td | 3 +- llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 2 ++ llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td | 5 +++ llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 31 ++----------------- 7 files changed, 31 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index bddea43fbb09c..9d26fc01bf379 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -139,6 +139,9 @@ enum { // 3 -> SEW * 4 DestEEWShift = ElementsDependOnMaskShift + 1, DestEEWMask = 3ULL << DestEEWShift, + + ReadsPastVLShift = DestEEWShift + 2, + ReadsPastVLMask = 1ULL << ReadsPastVLShift, }; // Helper functions to read TSFlags. @@ -195,6 +198,12 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) { return TSFlags & ElementsDependOnMaskMask; } +/// \returns true if the instruction may read elements past VL, e.g. +/// vslidedown/vrgather +static inline bool readsPastVL(uint64_t TSFlags) { + return TSFlags & ReadsPastVLMask; +} + static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; // This method is only called if we expect to have a VL operand, and all diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index d9c6101478064..878a0ec938919 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -261,6 +261,12 @@ class RVInstCommon; defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp +let ReadsPastVL = 1 in defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>; -let ElementsDependOn = EltDepsVL in +let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>; } // Predicates = [HasVInstructions] @@ -1712,19 +1713,19 @@ let Predicates = [HasVInstructionsAnyF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -let ElementsDependOn = EltDepsVL in +let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>; } // Predicates = [HasVInstructionsAnyF] let Predicates = [HasVInstructions] in { // Vector Register Gather Instruction -let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { +let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1 in { defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>; def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, SchedBinaryMC<"WriteVRGatherEI16VV", "ReadVRGatherEI16VV_data", "ReadVRGatherEI16VV_index">; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather +} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1 // Vector Compress Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress, ElementsDependOn = EltDepsVLMask in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index ebcf079f300b3..3a6ce3ce1d469 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -58,7 +58,7 @@ class CustomRivosXVI funct6, RISCVVFormat opv, dag outs, dag ins, let Predicates = [HasVendorXRivosVizip], DecoderNamespace = "XRivos", Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, - Inst<6-0> = OPC_CUSTOM_2.Value in { + Inst<6-0> = OPC_CUSTOM_2.Value, ReadsPastVL = 1 in { defm RI_VZIPEVEN_V : VALU_IV_V<"ri.vzipeven", 0b001100>; defm RI_VZIPODD_V : VALU_IV_V<"ri.vzipodd", 0b011100>; defm RI_VZIP2A_V : VALU_IV_V<"ri.vzip2a", 0b000100>; @@ -126,6 +126,7 @@ def RI_VINSERT : CustomRivosVXI<0b010000, OPMVX, (outs VR:$vd_wb), (ins VR:$vd, GPR:$rs1, uimm5:$imm), "ri.vinsert.v.x", "$vd, $rs1, $imm">; +let ReadsPastVL = 1 in def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd), (ins VR:$vs2, uimm5:$imm), "ri.vextract.x.v", "$rd, $vs2, $imm">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index a47dfe363c21e..b546339ce99e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -74,6 +74,7 @@ class RVInstVCCustom2 funct6_hi4, bits<3> funct3, dag outs, dag ins, let Uses = [VL, VTYPE]; let RVVConstraint = NoConstraint; let ElementsDependOn = EltDepsVLMask; + let ReadsPastVL = 1; } class RVInstVCFCustom2 funct6_hi4, bits<3> funct3, dag outs, dag ins, @@ -98,6 +99,7 @@ class RVInstVCFCustom2 funct6_hi4, bits<3> funct3, dag outs, dag ins, let Uses = [VL, VTYPE]; let RVVConstraint = NoConstraint; let ElementsDependOn = EltDepsVLMask; + let ReadsPastVL = 1; } class VCIXInfo nf, RISCVOpcode opcode, let Inst{6-0} = opcode.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in @@ -94,6 +95,7 @@ class SFInstTileMoveOp funct6, dag outs, dag ins, string opcodestr, let Inst{6-0} = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -113,6 +115,7 @@ class SFInstMatmulF let Inst{6-0} = OPC_OP_VE.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -135,6 +138,7 @@ class SFInstMatmulF8 { diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index c9464515d2e56..7295b6e0e5b30 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1210,34 +1210,6 @@ static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO) { } } -/// Return true if MI may read elements past VL. -static bool mayReadPastVL(const MachineInstr &MI) { - const RISCVVPseudosTable::PseudoInfo *RVV = - RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); - if (!RVV) - return true; - - switch (RVV->BaseInstr) { - // vslidedown instructions may read elements past VL. They are handled - // according to current tail policy. - case RISCV::VSLIDEDOWN_VI: - case RISCV::VSLIDEDOWN_VX: - case RISCV::VSLIDE1DOWN_VX: - case RISCV::VFSLIDE1DOWN_VF: - - // vrgather instructions may read the source vector at any index < VLMAX, - // regardless of VL. - case RISCV::VRGATHER_VI: - case RISCV::VRGATHER_VV: - case RISCV::VRGATHER_VX: - case RISCV::VRGATHEREI16_VV: - return true; - - default: - return false; - } -} - bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { const MCInstrDesc &Desc = MI.getDesc(); if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) @@ -1298,7 +1270,8 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { return std::nullopt; } - if (mayReadPastVL(UserMI)) { + if (RISCVII::readsPastVL( + TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); return std::nullopt; } From 8609ce9dffe09eb714afeb027be1054db8e1f42d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 30 Jul 2025 14:20:22 +0800 Subject: [PATCH 2/4] Precommit tests --- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 53 +++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 71 +++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index cd282c265ae47..8651eaad4ef28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -198,3 +198,56 @@ define void @fadd_fcmp_select_copy( %v, %c call void @llvm.riscv.vsm( %select, ptr %p, iXLen %vl) ret void } + +define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { +; CHECK-LABEL: recurrence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: .LBB13_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vadd.vv v10, v10, v8 +; CHECK-NEXT: bnez a1, .LBB13_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] + %phi = phi [ zeroinitializer, %entry ], [ %x, %loop ] + %x = add %phi, %v + %iv.next = add iXLen %iv, 1 + %done = icmp eq iXLen %iv.next, %n + br i1 %done, label %exit, label %loop +exit: + call void @llvm.riscv.vse( %x, ptr %p, iXLen %vl) + ret void +} + +define @phi( %v, i1 %cond, iXLen %vl) { +; CHECK-LABEL: phi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: beqz a0, .LBB14_2 +; CHECK-NEXT: # %bb.1: # %foo +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_2: # %bar +; CHECK-NEXT: vadd.vi v8, v8, 2 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd( poison, %v, iXLen 1, iXLen -1) + br i1 %cond, label %foo, label %bar +foo: + %b = call @llvm.riscv.vadd( poison, %a, iXLen 1, iXLen 1) + ret %b +bar: + %c = call @llvm.riscv.vadd( poison, %a, iXLen 2, iXLen 2) + ret %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 60398cdf1db66..538b68a70908b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -603,4 +603,73 @@ body: | $x10 = COPY %9 PseudoRET implicit $x10 ... - +--- +name: recurrence +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, -1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ + bb.0: + liveins: $x8 + %avl:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, -1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ +... +--- +name: recurrence_cant_reduce +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence_cant_reduce + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8 + ; CHECK-NEXT: %avl2:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ + bb.0: + liveins: $x8, $x9 + %avl1:gprnox0 = COPY $x8 + %avl2:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ +... From 2d71acb639d7c50344a836be6031c94c90e333e1 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 7 Feb 2025 11:37:56 +0800 Subject: [PATCH 3/4] [RISCV] Handle recurrences in RISCVVLOptimizer --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 132 +++++++++++------- .../CodeGen/RISCV/rvv/reproducer-pr146855.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll | 4 +- 5 files changed, 92 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 7295b6e0e5b30..40af9b04c97b6 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -30,6 +30,27 @@ using namespace llvm; namespace { +/// Wrapper around MachineOperand that defaults to immediate 0. +struct DemandedVL { + MachineOperand VL; + DemandedVL() : VL(MachineOperand::CreateImm(0)) {} + DemandedVL(MachineOperand VL) : VL(VL) {} + static DemandedVL vlmax() { + return DemandedVL(MachineOperand::CreateImm(RISCV::VLMaxSentinel)); + } + bool operator!=(const DemandedVL &Other) const { + return !VL.isIdenticalTo(Other.VL); + } +}; + +static DemandedVL max(const DemandedVL &LHS, const DemandedVL &RHS) { + if (RISCV::isVLKnownLE(LHS.VL, RHS.VL)) + return RHS; + if (RISCV::isVLKnownLE(RHS.VL, LHS.VL)) + return LHS; + return DemandedVL::vlmax(); +} + class RISCVVLOptimizer : public MachineFunctionPass { const MachineRegisterInfo *MRI; const MachineDominatorTree *MDT; @@ -51,17 +72,26 @@ class RISCVVLOptimizer : public MachineFunctionPass { StringRef getPassName() const override { return PASS_NAME; } private: - std::optional - getMinimumVLForUser(const MachineOperand &UserOp) const; - /// Returns the largest common VL MachineOperand that may be used to optimize - /// MI. Returns std::nullopt if it failed to find a suitable VL. - std::optional checkUsers(const MachineInstr &MI) const; + DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const; + /// Returns true if the users of \p MI have compatible EEWs and SEWs. + bool checkUsers(const MachineInstr &MI) const; bool tryReduceVL(MachineInstr &MI) const; bool isCandidate(const MachineInstr &MI) const; + void transfer(const MachineInstr &MI); + + /// Returns all uses of vector virtual registers. + auto vector_uses(const MachineInstr &MI) const { + auto Pred = [this](const MachineOperand &MO) -> bool { + return MO.isReg() && MO.getReg().isVirtual() && + RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg())); + }; + return make_filter_range(MI.uses(), Pred); + } /// For a given instruction, records what elements of it are demanded by /// downstream users. - DenseMap> DemandedVLs; + DenseMap DemandedVLs; + SetVector Worklist; }; /// Represents the EMUL and EEW of a MachineOperand. @@ -787,6 +817,9 @@ getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI) { /// white-list approach simplifies this optimization for instructions that may /// have more complex semantics with relation to how it uses VL. static bool isSupportedInstr(const MachineInstr &MI) { + if (MI.isPHI() || MI.isFullCopy()) + return true; + const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); @@ -1259,21 +1292,24 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return true; } -std::optional +DemandedVL RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MachineInstr &UserMI = *UserOp.getParent(); const MCInstrDesc &Desc = UserMI.getDesc(); + if (UserMI.isPHI() || UserMI.isFullCopy()) + return DemandedVLs.lookup(&UserMI); + if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" " use VLMAX\n"); - return std::nullopt; + return DemandedVL::vlmax(); } if (RISCVII::readsPastVL( TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); - return std::nullopt; + return DemandedVL::vlmax(); } unsigned VLOpNum = RISCVII::getVLOpNum(Desc); @@ -1287,11 +1323,10 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { if (UserOp.isTied()) { assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); - auto DemandedVL = DemandedVLs.lookup(&UserMI); - if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { + if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) { LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); - return std::nullopt; + return DemandedVL::vlmax(); } } @@ -1304,18 +1339,16 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. - if (auto DemandedVL = DemandedVLs.lookup(&UserMI)) { - assert(isCandidate(UserMI)); - if (RISCV::isVLKnownLE(*DemandedVL, VLOp)) - return DemandedVL; - } + if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) + return DemandedVLs.lookup(&UserMI); return VLOp; } -std::optional -RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { - std::optional CommonVL; +bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { + if (MI.isPHI() || MI.isFullCopy()) + return true; + SmallSetVector Worklist; SmallPtrSet PHISeen; for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) @@ -1343,23 +1376,9 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { continue; } - auto VLOp = getMinimumVLForUser(UserOp); - if (!VLOp) - return std::nullopt; - - // Use the largest VL among all the users. If we cannot determine this - // statically, then we cannot optimize the VL. - if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) { - CommonVL = *VLOp; - LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n"); - } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) { - LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n"); - return std::nullopt; - } - if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n"); - return std::nullopt; + return false; } std::optional ConsumerInfo = getOperandInfo(UserOp, MRI); @@ -1369,7 +1388,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } // If the operand is used as a scalar operand, then the EEW must be @@ -1384,11 +1403,11 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { << " Abort due to incompatible information for EMUL or EEW.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } } - return CommonVL; + return true; } bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { @@ -1404,9 +1423,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return false; } - auto CommonVL = DemandedVLs.lookup(&MI); - if (!CommonVL) - return false; + auto *CommonVL = &DemandedVLs.at(&MI).VL; assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) && "Expected VL to be an Imm or virtual Reg"); @@ -1441,6 +1458,24 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return true; } +static bool isPhysical(const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); +} + +/// Look through \p MI's operands and propagate what it demands to its uses. +void RISCVVLOptimizer::transfer(const MachineInstr &MI) { + if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical)) + DemandedVLs[&MI] = DemandedVL::vlmax(); + + for (const MachineOperand &MO : vector_uses(MI)) { + const MachineInstr *Def = MRI->getVRegDef(MO.getReg()); + DemandedVL Prev = DemandedVLs[Def]; + DemandedVLs[Def] = max(DemandedVLs[Def], getMinimumVLForUser(MO)); + if (DemandedVLs[Def] != Prev) + Worklist.insert(Def); + } +} + bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1457,14 +1492,17 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { assert(DemandedVLs.empty()); // For each instruction that defines a vector, compute what VL its - // downstream users demand. + // upstream uses demand. for (MachineBasicBlock *MBB : post_order(&MF)) { assert(MDT->isReachableFromEntry(MBB)); - for (MachineInstr &MI : reverse(*MBB)) { - if (!isCandidate(MI)) - continue; - DemandedVLs.insert({&MI, checkUsers(MI)}); - } + for (MachineInstr &MI : reverse(*MBB)) + Worklist.insert(&MI); + } + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.front(); + Worklist.remove(MI); + transfer(*MI); } // Then go through and see if we can reduce the VL of any instructions to diff --git a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll index cca00bf58063d..2d64defe8c7b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll +++ b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll @@ -6,7 +6,7 @@ target triple = "riscv64-unknown-linux-gnu" define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wide.load, %0, %1, %2, %3) #0 { ; CHECK-LABEL: _ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v10, 0 @@ -14,7 +14,7 @@ define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wi ; CHECK-NEXT: vmv.v.i v14, 0 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: vmv2r.v v16, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 8651eaad4ef28..ecea4efa4e768 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -202,7 +202,7 @@ define void @fadd_fcmp_select_copy( %v, %c define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { ; CHECK-LABEL: recurrence: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: .LBB13_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -210,7 +210,6 @@ define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { ; CHECK-NEXT: vadd.vv v10, v10, v8 ; CHECK-NEXT: bnez a1, .LBB13_1 ; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vse32.v v10, (a0) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 538b68a70908b..2fcb8da339b06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -613,14 +613,14 @@ body: | ; CHECK-NEXT: liveins: $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 - ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl, 3 /* e8 */, 3 /* ta, ma */ ; CHECK-NEXT: PseudoBR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 - ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, -1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl, 3 /* e8 */, 3 /* ta, ma */ ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: diff --git a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll index 4b9f9a0579c48..3a05477e64ccd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll @@ -11,7 +11,7 @@ ; which was responsible for speeding it up. define @same_vl_imm( %passthru, %a, %b) { - ; CHECK: User VL is: 4 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 4) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 4) @@ -19,7 +19,7 @@ define @same_vl_imm( %passthru, @same_vl_reg( %passthru, %a, %b, i64 %vl) { - ; CHECK: User VL is: %3:gprnox0 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 %vl) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 %vl) From 540f6f96fe79f3653262b91daa8915a9322e3e95 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 30 Jul 2025 16:07:00 +0800 Subject: [PATCH 4/4] Link to talk in header comment --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 40af9b04c97b6..a86311fb57836 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -10,9 +10,19 @@ // instructions are inserted. // // The purpose of this optimization is to make the VL argument, for instructions -// that have a VL argument, as small as possible. This is implemented by -// visiting each instruction in reverse order and checking that if it has a VL -// argument, whether the VL can be reduced. +// that have a VL argument, as small as possible. +// +// This is split into a sparse dataflow analysis where we determine what VL is +// demanded by each instruction first, and then afterwards try to reduce the VL +// of each instruction if it demands less than its VL operand. +// +// The analysis is explained in more detail in the 2025 EuroLLVM Developers' +// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL +// Optimizer", which is available on YouTube at +// https://www.youtube.com/watch?v=Mfb5fRSdJAc +// +// The slides for the talk are available at +// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf // //===---------------------------------------------------------------------===//