Skip to content

Commit d4bd7c9

Browse files
committed
[AArch64][MachineCombiner] Fix setting reg state for gather lane pattern
1 parent adb217d commit d4bd7c9

File tree

2 files changed

+34
-22
lines changed

2 files changed

+34
-22
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/STLExtras.h"
2323
#include "llvm/ADT/SmallSet.h"
2424
#include "llvm/ADT/SmallVector.h"
25+
#include "llvm/ADT/iterator_range.h"
2526
#include "llvm/CodeGen/CFIInstBuilder.h"
2627
#include "llvm/CodeGen/LivePhysRegs.h"
2728
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -7514,22 +7515,24 @@ generateGatherPattern(MachineInstr &Root,
75147515

75157516
auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
75167517
Register SrcRegister, unsigned Lane,
7517-
Register OffsetRegister) {
7518+
Register OffsetRegister,
7519+
bool OffsetRegisterKillState) {
75187520
auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
75197521
MachineInstrBuilder LoadIndexIntoRegister =
75207522
BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
75217523
NewRegister)
75227524
.addReg(SrcRegister)
75237525
.addImm(Lane)
7524-
.addReg(OffsetRegister, getKillRegState(true));
7526+
.addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
75257527
InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
75267528
InsInstrs.push_back(LoadIndexIntoRegister);
75277529
return NewRegister;
75287530
};
75297531

75307532
// Helper to create load instruction based on opcode
75317533
auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7532-
Register OffsetReg) -> MachineInstrBuilder {
7534+
Register OffsetReg,
7535+
bool KillState) -> MachineInstrBuilder {
75337536
unsigned Opcode;
75347537
switch (NumLanes) {
75357538
case 4:
@@ -7555,33 +7558,38 @@ generateGatherPattern(MachineInstr &Root,
75557558
auto LanesToLoadToReg0 =
75567559
llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
75577560
LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7558-
auto PrevReg = SubregToReg->getOperand(0).getReg();
7561+
Register PrevReg = SubregToReg->getOperand(0).getReg();
75597562
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7563+
const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
75607564
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
7561-
LoadInstr->getOperand(3).getReg());
7565+
OffsetRegOperand.getReg(),
7566+
OffsetRegOperand.isKill());
75627567
DelInstrs.push_back(LoadInstr);
75637568
}
7564-
auto LastLoadReg0 = PrevReg;
7569+
Register LastLoadReg0 = PrevReg;
75657570

75667571
// First load into register 1. Perform a LDRSui to zero out the upper lanes in
75677572
// a single instruction.
7568-
auto Lane0Load = *LoadToLaneInstrsAscending.begin();
7569-
auto OriginalSplitLoad =
7573+
MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7574+
MachineInstr *OriginalSplitLoad =
75707575
*std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7571-
auto DestRegForMiddleIndex = MRI.createVirtualRegister(
7576+
Register DestRegForMiddleIndex = MRI.createVirtualRegister(
75727577
MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
75737578

7579+
const MachineOperand &OriginalSplitToLoadOffsetOperand =
7580+
OriginalSplitLoad->getOperand(3);
75747581
MachineInstrBuilder MiddleIndexLoadInstr =
75757582
CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
7576-
OriginalSplitLoad->getOperand(3).getReg());
7583+
OriginalSplitToLoadOffsetOperand.getReg(),
7584+
OriginalSplitToLoadOffsetOperand.isKill());
75777585

75787586
InstrIdxForVirtReg.insert(
75797587
std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
75807588
InsInstrs.push_back(MiddleIndexLoadInstr);
75817589
DelInstrs.push_back(OriginalSplitLoad);
75827590

75837591
// Subreg To Reg instruction for register 1.
7584-
auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7592+
Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
75857593
unsigned SubregType;
75867594
switch (NumLanes) {
75877595
case 4:
@@ -7614,14 +7622,18 @@ generateGatherPattern(MachineInstr &Root,
76147622
LoadToLaneInstrsAscending.end());
76157623
PrevReg = SubRegToRegInstr->getOperand(0).getReg();
76167624
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7625+
const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
76177626
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
7618-
LoadInstr->getOperand(3).getReg());
7627+
OffsetRegOperand.getReg(),
7628+
OffsetRegOperand.isKill());
7629+
7630+
// Do not add the last reg to DelInstrs - it will be removed later.
76197631
if (Index == NumLanes / 2 - 2) {
76207632
break;
76217633
}
76227634
DelInstrs.push_back(LoadInstr);
76237635
}
7624-
auto LastLoadReg1 = PrevReg;
7636+
Register LastLoadReg1 = PrevReg;
76257637

76267638
// Create the final zip instruction to combine the results.
76277639
MachineInstrBuilder ZipInstr =

llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ body: |
1313
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
1414
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
1515
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
16-
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
17-
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
18-
; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
16+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 1
17+
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, [[LD_i32]], %subreg.ssub
18+
; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, [[COPY2]]
1919
; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0
2020
; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
21-
; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
21+
; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, [[COPY4]]
2222
; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
2323
; CHECK-NEXT: $q0 = COPY [[ZIP]]
2424
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -27,11 +27,11 @@ body: |
2727
%2:gpr64common = COPY $x2
2828
%3:gpr64common = COPY $x3
2929
%4:gpr64common = COPY $x4
30-
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
31-
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
32-
%7:fpr128 = LD1i32 %6, 1, killed %2
33-
%8:fpr128 = LD1i32 %7, 2, killed %3
34-
%9:fpr128 = LD1i32 %8, 3, killed %4
30+
%5:fpr32 = LDRSroX %0, %1, 0, 1
31+
%6:fpr128 = SUBREG_TO_REG 0, %5, %subreg.ssub
32+
%7:fpr128 = LD1i32 %6, 1, %2
33+
%8:fpr128 = LD1i32 %7, 2, %3
34+
%9:fpr128 = LD1i32 %8, 3, %4
3535
$q0 = COPY %9
3636
RET_ReallyLR implicit $q0
3737

0 commit comments

Comments
 (0)