Skip to content

[MachineScheduler] Make cluster check more efficient #150884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/include/llvm/CodeGen/MachineScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1303,8 +1303,8 @@ class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
SchedBoundary Top;
SchedBoundary Bot;

ClusterInfo *TopCluster;
ClusterInfo *BotCluster;
unsigned TopClusterID;
unsigned BotClusterID;

/// Candidate last picked from Top boundary.
SchedCandidate TopCand;
Expand Down Expand Up @@ -1346,8 +1346,8 @@ class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
/// Candidate last picked from Bot boundary.
SchedCandidate BotCand;

ClusterInfo *TopCluster;
ClusterInfo *BotCluster;
unsigned TopClusterID;
unsigned BotClusterID;

public:
PostGenericScheduler(const MachineSchedContext *C)
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/ScheduleDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ class TargetRegisterInfo;
typedef SmallSet<SUnit *, 8> ClusterInfo;
constexpr unsigned InvalidClusterId = ~0u;

/// Return whether the input cluster ID's are the same and valid.
inline bool isTheSameCluster(unsigned A, unsigned B) {
return A != InvalidClusterId && A == B;
}

/// Scheduling unit. This is a node in the scheduling DAG.
class SUnit {
private:
Expand Down
66 changes: 40 additions & 26 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
TopCand.SU = nullptr;
BotCand.SU = nullptr;

TopCluster = nullptr;
BotCluster = nullptr;
TopClusterID = InvalidClusterId;
BotClusterID = InvalidClusterId;
}

/// Initialize the per-region scheduling policy.
Expand Down Expand Up @@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;

Expand Down Expand Up @@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
TopCluster = DAG->getCluster(SU->ParentClusterIdx);
LLVM_DEBUG(if (TopCluster) {
dbgs() << " Top Cluster: ";
for (auto *N : *TopCluster)
dbgs() << N->NodeNum << '\t';
dbgs() << '\n';
TopClusterID = SU->ParentClusterIdx;
LLVM_DEBUG({
if (TopClusterID != InvalidClusterId) {
ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
dbgs() << " Top Cluster: ";
for (auto *N : *TopCluster)
dbgs() << N->NodeNum << '\t';
dbgs() << '\n';
}
});
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
BotCluster = DAG->getCluster(SU->ParentClusterIdx);
LLVM_DEBUG(if (BotCluster) {
dbgs() << " Bot Cluster: ";
for (auto *N : *BotCluster)
dbgs() << N->NodeNum << '\t';
dbgs() << '\n';
BotClusterID = SU->ParentClusterIdx;
LLVM_DEBUG({
if (BotClusterID != InvalidClusterId) {
ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
dbgs() << " Bot Cluster: ";
for (auto *N : *BotCluster)
dbgs() << N->NodeNum << '\t';
dbgs() << '\n';
}
});
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
Expand Down Expand Up @@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
if (!Bot.HazardRec) {
Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
TopCluster = nullptr;
BotCluster = nullptr;
TopClusterID = InvalidClusterId;
BotClusterID = InvalidClusterId;
}

void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
Expand Down Expand Up @@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;

// Keep clustered nodes together.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
Expand Down Expand Up @@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
TopCluster = DAG->getCluster(SU->ParentClusterIdx);
TopClusterID = SU->ParentClusterIdx;
Top.bumpNode(SU);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
BotCluster = DAG->getCluster(SU->ParentClusterIdx);
BotClusterID = SU->ParentClusterIdx;
Bot.bumpNode(SU);
}
}
Expand Down
22 changes: 14 additions & 8 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,10 +592,13 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;

Expand Down Expand Up @@ -666,10 +669,13 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,

// MaxMemoryClause-specific: We prioritize clustered instructions as we would
// get more benefit from clausing these memory instructions.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;

Expand Down
24 changes: 16 additions & 8 deletions llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,14 @@ bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;

Expand Down Expand Up @@ -189,10 +193,14 @@ bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;

// Keep clustered nodes together.
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
bool CandIsClusterSucc =
isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
bool TryCandIsClusterSucc =
isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;

Expand Down