[MachineScheduler] Make cluster check more efficient #150884

ruiling · 2025-07-28T06:10:42Z

No description provided.

llvmbot · 2025-07-28T06:11:15Z

@llvm/pr-subscribers-backend-powerpc

@llvm/pr-subscribers-backend-amdgpu

Author: Ruiling, Song (ruiling)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/150884.diff

5 Files Affected:

(modified) llvm/include/llvm/CodeGen/MachineScheduler.h (+4-4)
(modified) llvm/include/llvm/CodeGen/ScheduleDAG.h (+5)
(modified) llvm/lib/CodeGen/MachineScheduler.cpp (+28-18)
(modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+14-8)
(modified) llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp (+16-8)

diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index efda7eb8ffc8d..5a2aee2fa7643 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1303,8 +1303,8 @@ class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
   SchedBoundary Top;
   SchedBoundary Bot;
 
-  ClusterInfo *TopCluster;
-  ClusterInfo *BotCluster;
+  unsigned TopClusterID;
+  unsigned BotClusterID;
 
   /// Candidate last picked from Top boundary.
   SchedCandidate TopCand;
@@ -1346,8 +1346,8 @@ class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
   /// Candidate last picked from Bot boundary.
   SchedCandidate BotCand;
 
-  ClusterInfo *TopCluster;
-  ClusterInfo *BotCluster;
+  unsigned TopClusterID;
+  unsigned BotClusterID;
 
 public:
   PostGenericScheduler(const MachineSchedContext *C)
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 3a0a31b1930b0..122b7be96b46a 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -240,6 +240,11 @@ class TargetRegisterInfo;
   typedef SmallSet<SUnit *, 8> ClusterInfo;
   constexpr unsigned InvalidClusterId = ~0u;
 
+  /// Return whether the input cluster ID's are the same and valid.
+  inline bool isTheSameCluster(unsigned A, unsigned B) {
+    return A != InvalidClusterId && A == B;
+  }
+
   /// Scheduling unit. This is a node in the scheduling DAG.
   class SUnit {
   private:
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 9d5c39ce7ae76..454eb4b32b338 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
   TopCand.SU = nullptr;
   BotCand.SU = nullptr;
 
-  TopCluster = nullptr;
-  BotCluster = nullptr;
+  TopClusterID = InvalidClusterId;
+  BotClusterID = InvalidClusterId;
 }
 
 /// Initialize the per-region scheduling policy.
@@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
   // This is a best effort to set things up for a post-RA pass. Optimizations
   // like generating loads of multiple registers should ideally be done within
   // the scheduler pass by combining the loads during DAG postprocessing.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;
 
@@ -4251,8 +4255,9 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
-    TopCluster = DAG->getCluster(SU->ParentClusterIdx);
-    LLVM_DEBUG(if (TopCluster) {
+    TopClusterID = SU->ParentClusterIdx;
+    LLVM_DEBUG(if (TopClusterID != InvalidClusterId) {
+      ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
       dbgs() << "  Top Cluster: ";
       for (auto *N : *TopCluster)
         dbgs() << N->NodeNum << '\t';
@@ -4263,8 +4268,9 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
       reschedulePhysReg(SU, true);
   } else {
     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
-    BotCluster = DAG->getCluster(SU->ParentClusterIdx);
-    LLVM_DEBUG(if (BotCluster) {
+    BotClusterID = SU->ParentClusterIdx;
+    LLVM_DEBUG(if (BotClusterID != InvalidClusterId) {
+      ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
       dbgs() << "  Bot Cluster: ";
       for (auto *N : *BotCluster)
         dbgs() << N->NodeNum << '\t';
@@ -4306,8 +4312,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
   if (!Bot.HazardRec) {
     Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
   }
-  TopCluster = nullptr;
-  BotCluster = nullptr;
+  TopClusterID = InvalidClusterId;
+  BotClusterID = InvalidClusterId;
 }
 
 void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
@@ -4373,10 +4379,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
     return TryCand.Reason != NoCand;
 
   // Keep clustered nodes together.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;
   // Avoid critical resource consumption and balance the schedule.
@@ -4575,11 +4585,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
-    TopCluster = DAG->getCluster(SU->ParentClusterIdx);
+    TopClusterID = SU->ParentClusterIdx;
     Top.bumpNode(SU);
   } else {
     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
-    BotCluster = DAG->getCluster(SU->ParentClusterIdx);
+    BotClusterID = SU->ParentClusterIdx;
     Bot.bumpNode(SU);
   }
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a6553083d722b..b7622d143dabd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -592,10 +592,13 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
   // This is a best effort to set things up for a post-RA pass. Optimizations
   // like generating loads of multiple registers should ideally be done within
   // the scheduler pass by combining the loads during DAG postprocessing.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;
 
@@ -666,10 +669,13 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,
 
   // MaxMemoryClause-specific: We prioritize clustered instructions as we would
   // get more benefit from clausing these memory instructions.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;
 
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5eb1f0128643d..b7e2263424863 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -100,10 +100,14 @@ bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
   // This is a best effort to set things up for a post-RA pass. Optimizations
   // like generating loads of multiple registers should ideally be done within
   // the scheduler pass by combining the loads during DAG postprocessing.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;
 
@@ -189,10 +193,14 @@ bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
     return TryCand.Reason != NoCand;
 
   // Keep clustered nodes together.
-  const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
-  const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
-  if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
-                 CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+  bool CandIsClusterSucc =
+      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+  bool TryCandIsClusterSucc =
+      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
                  Cluster))
     return TryCand.Reason != NoCand;

wangpc-pp

Can you confirm the compile-time gain?

ruiling · 2025-07-30T02:30:23Z

Can you confirm the compile-time gain?

I don't see the compile-time issue on my side. I am not sure whether @mysterymath can help on this as he observed the issue. But I think the change would help reduce compile time as it stops calling SmallSet::contains().

llvm/lib/CodeGen/MachineScheduler.cpp

wangpc-pp

LGTM.

mysterymath · 2025-07-30T18:28:35Z

Can you confirm the compile-time gain?

I don't see the compile-time issue on my side. I am not sure whether @mysterymath can help on this as he observed the issue. But I think the change would help reduce compile time as it stops calling SmallSet::contains().

It seems this wasn't the root cause of our regression, since IIRC went away in the meantime.

[MachineScheduler] Make cluster check more efficient

477152b

ruiling requested review from jayfoad, arsenm and wangpc-pp July 28, 2025 06:10

llvmbot added backend:AMDGPU backend:PowerPC llvm:codegen labels Jul 28, 2025

ruiling mentioned this pull request Jul 28, 2025

MachineScheduler: Improve instruction clustering #137784

Merged

wangpc-pp reviewed Jul 28, 2025

View reviewed changes

arsenm approved these changes Jul 30, 2025

View reviewed changes

llvm/lib/CodeGen/MachineScheduler.cpp Outdated Show resolved Hide resolved

wangpc-pp approved these changes Jul 30, 2025

View reviewed changes

Format the code in LLVM_DEBUG

8213152

ruiling merged commit 451912a into llvm:main Aug 1, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MachineScheduler] Make cluster check more efficient #150884

[MachineScheduler] Make cluster check more efficient #150884

Uh oh!

ruiling commented Jul 28, 2025

Uh oh!

llvmbot commented Jul 28, 2025 •

edited

Loading

Uh oh!

wangpc-pp left a comment

Uh oh!

ruiling commented Jul 30, 2025

Uh oh!

Uh oh!

wangpc-pp left a comment

Uh oh!

mysterymath commented Jul 30, 2025

Uh oh!

Uh oh!

Uh oh!

[MachineScheduler] Make cluster check more efficient #150884

[MachineScheduler] Make cluster check more efficient #150884

Uh oh!

Conversation

ruiling commented Jul 28, 2025

Uh oh!

llvmbot commented Jul 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

wangpc-pp left a comment

Choose a reason for hiding this comment

Uh oh!

ruiling commented Jul 30, 2025

Uh oh!

Uh oh!

wangpc-pp left a comment

Choose a reason for hiding this comment

Uh oh!

mysterymath commented Jul 30, 2025

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jul 28, 2025 •

edited

Loading