Skip to content

Commit 76289e1

Browse files
committed
[AArch64] [MachineCombiner] Verify load instructions in gather pattern cannot possibly overlap
1 parent d4bd7c9 commit 76289e1

File tree

3 files changed

+110
-1
lines changed

3 files changed

+110
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/STLExtras.h"
2323
#include "llvm/ADT/SmallSet.h"
2424
#include "llvm/ADT/SmallVector.h"
25+
#include "llvm/ADT/Statistic.h"
2526
#include "llvm/ADT/iterator_range.h"
2627
#include "llvm/CodeGen/CFIInstBuilder.h"
2728
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -86,6 +87,11 @@ static cl::opt<unsigned>
8687
BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
8788
cl::desc("Restrict range of B instructions (DEBUG)"));
8889

90+
#define DEBUG_TYPE "aarch64-machine-combine"
91+
STATISTIC(NumGathersMatched, "Number of `gather`-like patterns matched");
92+
STATISTIC(NumGathersDroppedAliasing, "Number of `gather`-like patterns dropped "
93+
"due to potential pointer aliasing");
94+
8995
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
9096
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
9197
AArch64::CATCHRET),
@@ -7416,14 +7422,21 @@ static bool getGatherPattern(MachineInstr &Root,
74167422
// 1. It has a single non-debug use (since we will be replacing the virtual
74177423
// register)
74187424
// 2. That the addressing mode only uses a single offset register.
7425+
// 3. The address operand does not have any users that are a COPY operation to
7426+
// a physical reg.
7427+
// This could indicate that it is copied as part of an ABI of a function
7428+
// call, which means that it may be modified in unexpected ways, see: <link
7429+
// to github>
74197430
auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
74207431
auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7421-
SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end());
7432+
SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7433+
SmallSet<const MachineInstr *, 16> LoadInstrs = {};
74227434
while (!RemainingLanes.empty() && CurrInstr &&
74237435
CurrInstr->getOpcode() == LoadLaneOpCode &&
74247436
MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
74257437
CurrInstr->getNumOperands() == 4) {
74267438
RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7439+
LoadInstrs.insert(CurrInstr);
74277440
CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
74287441
}
74297442

@@ -7444,6 +7457,15 @@ static bool getGatherPattern(MachineInstr &Root,
74447457
if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
74457458
return false;
74467459

7460+
LoadInstrs.insert(MRI.getUniqueVRegDef(Lane0LoadReg));
7461+
7462+
// Conservative check that we can
7463+
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
7464+
for (auto LoadA = LoadInstrs.begin(); LoadA != LoadInstrs.end(); ++LoadA)
7465+
for (auto LoadB = ++LoadA; LoadB != LoadInstrs.end(); ++LoadB)
7466+
if (!TII->areMemAccessesTriviallyDisjoint(**LoadA, **LoadB))
7467+
return false;
7468+
74477469
switch (NumLanes) {
74487470
case 4:
74497471
Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# RUN: llc -run-pass=machine-combiner -mcpu=neoverse-n2 -mtriple=aarch64-none-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s
2+
3+
4+
--- |
5+
@external_func = external global i32
6+
define void @negative_pattern_offset_reg_copied_to_physical(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4) {
7+
entry:
8+
ret void
9+
}
10+
...
11+
---
12+
name: negative_pattern_offset_reg_copied_to_physical
13+
body: |
14+
bb.0.entry:
15+
liveins: $x0, $x1, $x2, $x3
16+
17+
; CHECK-LABEL: name: negative_pattern_offset_reg_copied_to_physical
18+
; CHECK: [[BASE_REG:%[0-9]+]]:gpr64common = COPY $x0
19+
; CHECK-NEXT: [[PTR_1:%[0-9]+]]:gpr64common = COPY $x1
20+
; CHECK-NEXT: [[PTR_2:%[0-9]+]]:gpr64common = COPY $x2
21+
; CHECK-NEXT: [[PTR_3:%[0-9]+]]:gpr64common = COPY $x3
22+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[BASE_REG]], killed [[PTR_1]], 0, 1
23+
; CHECK-NEXT: [[LD_LANE_0:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
24+
; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_0]], 1, [[PTR_2]]
25+
; CHECK-NEXT: $x0 = COPY [[PTR_2]]
26+
; CHECK-NEXT: BL @external_func, csr_aarch64_aapcs, implicit-def $lr, implicit $x0, implicit-def $x0
27+
; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[PTR_2]]
28+
; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[PTR_3]]
29+
; CHECK-NEXT: [[RESULT:%[0-9]+]]:gpr64common = COPY $x0
30+
; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
31+
; CHECK-NEXT: RET_ReallyLR implicit $q0
32+
%0:gpr64common = COPY $x0
33+
%1:gpr64common = COPY $x1
34+
%2:gpr64common = COPY $x2
35+
%3:gpr64common = COPY $x3
36+
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
37+
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
38+
%7:fpr128 = LD1i32 %6, 1, %2
39+
$x0 = COPY %2
40+
BL @external_func, csr_aarch64_aapcs, implicit-def $lr, implicit $x0, implicit-def $x0
41+
%8:fpr128 = LD1i32 %7, 2, killed %2
42+
%9:fpr128 = LD1i32 %8, 3, killed %3
43+
%10:gpr64common = COPY $x0
44+
$q0 = COPY %9
45+
RET_ReallyLR implicit $q0
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# RUN: llc -run-pass=machine-combiner -mcpu=neoverse-n2 -mtriple=aarch64-none-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s
2+
3+
--- |
4+
define void @aliasing_store_between_vector_loads(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
5+
entry:
6+
ret void
7+
}
8+
9+
...
10+
---
11+
name: aliasing_store_between_vector_loads
12+
alignment: 4
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0.entry:
16+
liveins: $x0, $x1, $x2, $x3
17+
18+
; CHECK-LABEL: name: aliasing_store_between_vector_loads
19+
; CHECK: [[BASE_PTR:%[0-9]+]]:gpr64common = COPY $x0
20+
; CHECK-NEXT: [[OFFSET_PTR:%[0-9]+]]:gpr64common = COPY $x1
21+
; CHECK-NEXT: [[ALIAS_ADDR:%[0-9]+]]:gpr64common = COPY $x2
22+
; CHECK-NEXT: [[OTHER_ADDR:%[0-9]+]]:gpr64common = COPY $x3
23+
; CHECK-NEXT: [[VEC1:%[0-9]+]]:fpr128 = LD1i32 %{{[0-9]+}}, 1, [[ALIAS_ADDR]]
24+
; CHECK-NEXT: [[CONST:%[0-9]+]]:gpr32 = MOVi32 99
25+
; CHECK-NEXT: STRWui [[CONST]], [[ALIAS_ADDR]], 0
26+
; CHECK-NEXT: [[VEC2:%[0-9]+]]:fpr128 = LD1i32 [[VEC1]], 2, killed [[ALIAS_ADDR]]
27+
; CHECK-NEXT: [[VEC3:%[0-9]+]]:fpr128 = LD1i32 [[VEC2]], 3, killed [[OTHER_ADDR]]
28+
; CHECK-NEXT: $q0 = COPY [[VEC3]]
29+
; CHECK-NEXT: RET_ReallyLR implicit $q0
30+
%0:gpr64common = COPY $x0
31+
%1:gpr64common = COPY $x1
32+
%2:gpr64common = COPY $x2
33+
%3:gpr64common = COPY $x3
34+
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
35+
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
36+
%7:fpr128 = LD1i32 %6, 1, %2
37+
%10:gpr32 = MOVi32imm 99
38+
STRWui %10, %2, 0
39+
%8:fpr128 = LD1i32 %7, 2, killed %2
40+
%9:fpr128 = LD1i32 %8, 3, killed %3
41+
$q0 = COPY %9
42+
RET_ReallyLR implicit $q0

0 commit comments

Comments
 (0)