22
22
#include " llvm/ADT/STLExtras.h"
23
23
#include " llvm/ADT/SmallSet.h"
24
24
#include " llvm/ADT/SmallVector.h"
25
+ #include " llvm/ADT/iterator_range.h"
25
26
#include " llvm/CodeGen/CFIInstBuilder.h"
26
27
#include " llvm/CodeGen/LivePhysRegs.h"
27
28
#include " llvm/CodeGen/MachineBasicBlock.h"
@@ -7514,22 +7515,24 @@ generateGatherPattern(MachineInstr &Root,
7514
7515
7515
7516
auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
7516
7517
Register SrcRegister, unsigned Lane,
7517
- Register OffsetRegister) {
7518
+ Register OffsetRegister,
7519
+ bool OffsetRegisterKillState) {
7518
7520
auto NewRegister = MRI.createVirtualRegister (FPR128RegClass);
7519
7521
MachineInstrBuilder LoadIndexIntoRegister =
7520
7522
BuildMI (MF, MIMetadata (*OriginalInstr), TII->get (Root.getOpcode ()),
7521
7523
NewRegister)
7522
7524
.addReg (SrcRegister)
7523
7525
.addImm (Lane)
7524
- .addReg (OffsetRegister, getKillRegState (true ));
7526
+ .addReg (OffsetRegister, getKillRegState (OffsetRegisterKillState ));
7525
7527
InstrIdxForVirtReg.insert (std::make_pair (NewRegister, InsInstrs.size ()));
7526
7528
InsInstrs.push_back (LoadIndexIntoRegister);
7527
7529
return NewRegister;
7528
7530
};
7529
7531
7530
7532
// Helper to create load instruction based on opcode
7531
7533
auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7532
- Register OffsetReg) -> MachineInstrBuilder {
7534
+ Register OffsetReg,
7535
+ bool KillState) -> MachineInstrBuilder {
7533
7536
unsigned Opcode;
7534
7537
switch (NumLanes) {
7535
7538
case 4 :
@@ -7555,33 +7558,38 @@ generateGatherPattern(MachineInstr &Root,
7555
7558
auto LanesToLoadToReg0 =
7556
7559
llvm::make_range (LoadToLaneInstrsAscending.begin () + 1 ,
7557
7560
LoadToLaneInstrsAscending.begin () + NumLanes / 2 );
7558
- auto PrevReg = SubregToReg->getOperand (0 ).getReg ();
7561
+ Register PrevReg = SubregToReg->getOperand (0 ).getReg ();
7559
7562
for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg0)) {
7563
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand (3 );
7560
7564
PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7561
- LoadInstr->getOperand (3 ).getReg ());
7565
+ OffsetRegOperand.getReg (),
7566
+ OffsetRegOperand.isKill ());
7562
7567
DelInstrs.push_back (LoadInstr);
7563
7568
}
7564
- auto LastLoadReg0 = PrevReg;
7569
+ Register LastLoadReg0 = PrevReg;
7565
7570
7566
7571
// First load into register 1. Perform a LDRSui to zero out the upper lanes in
7567
7572
// a single instruction.
7568
- auto Lane0Load = *LoadToLaneInstrsAscending.begin ();
7569
- auto OriginalSplitLoad =
7573
+ MachineInstr * Lane0Load = *LoadToLaneInstrsAscending.begin ();
7574
+ MachineInstr * OriginalSplitLoad =
7570
7575
*std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
7571
- auto DestRegForMiddleIndex = MRI.createVirtualRegister (
7576
+ Register DestRegForMiddleIndex = MRI.createVirtualRegister (
7572
7577
MRI.getRegClass (Lane0Load->getOperand (0 ).getReg ()));
7573
7578
7579
+ const MachineOperand &OriginalSplitToLoadOffsetOperand =
7580
+ OriginalSplitLoad->getOperand (3 );
7574
7581
MachineInstrBuilder MiddleIndexLoadInstr =
7575
7582
CreateLoadInstruction (NumLanes, DestRegForMiddleIndex,
7576
- OriginalSplitLoad->getOperand (3 ).getReg ());
7583
+ OriginalSplitToLoadOffsetOperand.getReg (),
7584
+ OriginalSplitToLoadOffsetOperand.isKill ());
7577
7585
7578
7586
InstrIdxForVirtReg.insert (
7579
7587
std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
7580
7588
InsInstrs.push_back (MiddleIndexLoadInstr);
7581
7589
DelInstrs.push_back (OriginalSplitLoad);
7582
7590
7583
7591
// Subreg To Reg instruction for register 1.
7584
- auto DestRegForSubregToReg = MRI.createVirtualRegister (FPR128RegClass);
7592
+ Register DestRegForSubregToReg = MRI.createVirtualRegister (FPR128RegClass);
7585
7593
unsigned SubregType;
7586
7594
switch (NumLanes) {
7587
7595
case 4 :
@@ -7614,14 +7622,18 @@ generateGatherPattern(MachineInstr &Root,
7614
7622
LoadToLaneInstrsAscending.end ());
7615
7623
PrevReg = SubRegToRegInstr->getOperand (0 ).getReg ();
7616
7624
for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg1)) {
7625
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand (3 );
7617
7626
PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7618
- LoadInstr->getOperand (3 ).getReg ());
7627
+ OffsetRegOperand.getReg (),
7628
+ OffsetRegOperand.isKill ());
7629
+
7630
+ // Do not add the last reg to DelInstrs - it will be removed later.
7619
7631
if (Index == NumLanes / 2 - 2 ) {
7620
7632
break ;
7621
7633
}
7622
7634
DelInstrs.push_back (LoadInstr);
7623
7635
}
7624
- auto LastLoadReg1 = PrevReg;
7636
+ Register LastLoadReg1 = PrevReg;
7625
7637
7626
7638
// Create the final zip instruction to combine the results.
7627
7639
MachineInstrBuilder ZipInstr =
0 commit comments