22
22
#include " llvm/ADT/STLExtras.h"
23
23
#include " llvm/ADT/SmallSet.h"
24
24
#include " llvm/ADT/SmallVector.h"
25
+ #include " llvm/ADT/Statistic.h"
25
26
#include " llvm/ADT/iterator_range.h"
26
27
#include " llvm/CodeGen/CFIInstBuilder.h"
27
28
#include " llvm/CodeGen/LivePhysRegs.h"
@@ -86,6 +87,11 @@ static cl::opt<unsigned>
86
87
BDisplacementBits (" aarch64-b-offset-bits" , cl::Hidden, cl::init(26 ),
87
88
cl::desc(" Restrict range of B instructions (DEBUG)" ));
88
89
90
+ #define DEBUG_TYPE " aarch64-machine-combine"
91
+ STATISTIC (NumGathersMatched, " Number of `gather`-like patterns matched" );
92
+ STATISTIC (NumGathersDroppedAliasing, " Number of `gather`-like patterns dropped "
93
+ " due to potential pointer aliasing" );
94
+
89
95
AArch64InstrInfo::AArch64InstrInfo (const AArch64Subtarget &STI)
90
96
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
91
97
AArch64::CATCHRET),
@@ -7416,14 +7422,21 @@ static bool getGatherPattern(MachineInstr &Root,
7416
7422
// 1. It has a single non-debug use (since we will be replacing the virtual
7417
7423
// register)
7418
7424
// 2. That the addressing mode only uses a single offset register.
7425
+ // 3. The address operand does not have any users that are a COPY operation to
7426
+ // a physical reg.
7427
+ // This could indicate that it is copied as part of an ABI of a function
7428
+ // call, which means that it may be modified in unexpected ways, see: <link
7429
+ // to github>
7419
7430
auto *CurrInstr = MRI.getUniqueVRegDef (Root.getOperand (1 ).getReg ());
7420
7431
auto Range = llvm::seq<unsigned >(1 , NumLanes - 1 );
7421
- SmallSet<unsigned , 4 > RemainingLanes (Range.begin (), Range.end ());
7432
+ SmallSet<unsigned , 16 > RemainingLanes (Range.begin (), Range.end ());
7433
+ SmallSet<const MachineInstr *, 16 > LoadInstrs = {};
7422
7434
while (!RemainingLanes.empty () && CurrInstr &&
7423
7435
CurrInstr->getOpcode () == LoadLaneOpCode &&
7424
7436
MRI.hasOneNonDBGUse (CurrInstr->getOperand (0 ).getReg ()) &&
7425
7437
CurrInstr->getNumOperands () == 4 ) {
7426
7438
RemainingLanes.erase (CurrInstr->getOperand (2 ).getImm ());
7439
+ LoadInstrs.insert (CurrInstr);
7427
7440
CurrInstr = MRI.getUniqueVRegDef (CurrInstr->getOperand (1 ).getReg ());
7428
7441
}
7429
7442
@@ -7444,6 +7457,15 @@ static bool getGatherPattern(MachineInstr &Root,
7444
7457
if (!MRI.hasOneNonDBGUse (Lane0LoadReg))
7445
7458
return false ;
7446
7459
7460
+ LoadInstrs.insert (MRI.getUniqueVRegDef (Lane0LoadReg));
7461
+
7462
+ // Conservative check that we can
7463
+ const TargetInstrInfo *TII = MF->getSubtarget ().getInstrInfo ();
7464
+ for (auto LoadA = LoadInstrs.begin (); LoadA != LoadInstrs.end (); ++LoadA)
7465
+ for (auto LoadB = ++LoadA; LoadB != LoadInstrs.end (); ++LoadB)
7466
+ if (!TII->areMemAccessesTriviallyDisjoint (**LoadA, **LoadB))
7467
+ return false ;
7468
+
7447
7469
switch (NumLanes) {
7448
7470
case 4 :
7449
7471
Patterns.push_back (AArch64MachineCombinerPattern::GATHER_LANE_i32);
0 commit comments