Skip to content

Commit 93faa23

Browse files
committed
[PowerPC] Add Support for indirect calls on AIX.
Extends the desciptor-based indirect call support for 32-bit codegen, and enables indirect calls for AIX. In-depth Description: In a function descriptor based ABI, a function pointer points at a descriptor structure as opposed to the function's entry point. The descriptor takes the form of 3 pointers: 1 for the function's entry point, 1 for the TOC anchor of the module containing the function definition, and 1 for the environment pointer: struct FunctionDescriptor { void *EntryPoint; void *TOCAnchor; void *EnvironmentPointer; }; An indirect call has several steps of loading the the information from the descriptor into the proper registers for setting up the call. Namely it has to: 1) Save the caller's TOC pointer into the TOC save slot in the linkage area, and then load the callee's TOC pointer into the TOC register (GPR 2 on AIX). 2) Load the function descriptor's entry point into the count register. 3) Load the environment pointer into the environment pointer register (GPR 11 on AIX). 4) Perform the call by branching on count register. 5) Restore the caller's TOC pointer after returning from the indirect call. A couple important caveats to the above: - There is no way to directly load a value from memory into the count register. Instead we populate the count register by loading the entry point address into a gpr and then moving the gpr to the count register. - The TOC restore has to come immediately after the branch on count register instruction (i.e., the 1st instruction executed after we return from the call). This is an implementation limitation. We could, in theory, schedule the restore elsewhere as long as no uses of the TOC pointer fall in between the call and the restore; however, to keep it simple, we insert a pseudo instruction that represents both the indirect branch instruction and the load instruction that restores the caller's TOC from the linkage area. As they flow through the compiler as a single pseudo instruction, nothing can be inserted between them and the caller's TOC is then valid at any use. Differtential Revision: https://reviews.llvm.org/D70724
1 parent cf25224 commit 93faa23

File tree

9 files changed

+290
-35
lines changed

9 files changed

+290
-35
lines changed

llvm/lib/Target/PowerPC/P9InstrResources.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,7 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
13181318
BCLalways,
13191319
BCLn,
13201320
BCTRL8_LDinto_toc,
1321+
BCTRL_LWZinto_toc,
13211322
BCn,
13221323
CTRL_DEP
13231324
)>;

llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2441,10 +2441,6 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
24412441
}
24422442

24432443
unsigned PPCFrameLowering::getTOCSaveOffset() const {
2444-
if (Subtarget.isAIXABI())
2445-
// TOC save/restore is normally handled by the linker.
2446-
// Indirect calls should hit this limitation.
2447-
report_fatal_error("TOC save is not implemented on AIX yet.");
24482444
return TOCSaveOffset;
24492445
}
24502446

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3154,11 +3154,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
31543154

31553155
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
31563156
SelectionDAG &DAG) const {
3157+
if (Subtarget.isAIXABI())
3158+
report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3159+
31573160
return Op.getOperand(0);
31583161
}
31593162

31603163
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
31613164
SelectionDAG &DAG) const {
3165+
if (Subtarget.isAIXABI())
3166+
report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3167+
31623168
SDValue Chain = Op.getOperand(0);
31633169
SDValue Trmp = Op.getOperand(1); // trampoline
31643170
SDValue FPtr = Op.getOperand(2); // nested function
@@ -5209,34 +5215,48 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
52095215

52105216
MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
52115217

5218+
// Registers used in building the DAG.
5219+
const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5220+
const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5221+
5222+
// Offsets of descriptor members.
5223+
const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5224+
const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5225+
5226+
const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5227+
const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5228+
52125229
// One load for the functions entry point address.
5213-
SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5214-
/* Alignment = */ 8, MMOFlags);
5230+
SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5231+
Alignment, MMOFlags);
52155232

52165233
// One for loading the TOC anchor for the module that contains the called
52175234
// function.
5218-
SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5219-
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5235+
SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5236+
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
52205237
SDValue TOCPtr =
5221-
DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5222-
/* Alignment = */ 8, MMOFlags);
5238+
DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5239+
MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
52235240

52245241
// One for loading the environment pointer.
5225-
SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5226-
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5242+
SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5243+
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
52275244
SDValue LoadEnvPtr =
5228-
DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5229-
/* Alignment = */ 8, MMOFlags);
5245+
DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5246+
MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5247+
52305248

52315249
// Then copy the newly loaded TOC anchor to the TOC pointer.
5232-
SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, Glue);
5250+
SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
52335251
Chain = TOCVal.getValue(0);
52345252
Glue = TOCVal.getValue(1);
52355253

52365254
// If the function call has an explicit 'nest' parameter, it takes the
52375255
// place of the environment pointer.
5256+
assert((!hasNest || !Subtarget.isAIXABI()) &&
5257+
"Nest parameter is not supported on AIX.");
52385258
if (!hasNest) {
5239-
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, Glue);
5259+
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
52405260
Chain = EnvVal.getValue(0);
52415261
Glue = EnvVal.getValue(1);
52425262
}
@@ -5265,27 +5285,29 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
52655285
Ops.push_back(Callee);
52665286
else {
52675287
assert(!isPatchPoint && "Patch point call are not indirect.");
5268-
if (Subtarget.isAIXABI())
5269-
report_fatal_error("Indirect call on AIX is not implemented.");
5270-
5271-
// For 64-bit ELF we have saved the TOC pointer to the linkage area on the
5272-
// stack (this would have been done in `LowerCall_64SVR4`). The call
5273-
// instruction is a pseudo instruction that represents both the indirect
5274-
// branch and a load that restores the TOC pointer from the linkage area.
5275-
// The operand for the TOC restore is an add of the TOC save offset to the
5276-
// stack pointer. This must be the second operand: after the chain input but
5277-
// before any other variadic arguments.
5278-
if (Subtarget.is64BitELFABI()) {
5279-
SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5288+
5289+
// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5290+
// on the stack (this would have been done in `LowerCall_64SVR4` or
5291+
// `LowerCall_AIX`). The call instruction is a pseudo instruction that
5292+
// represents both the indirect branch and a load that restores the TOC
5293+
// pointer from the linkage area. The operand for the TOC restore is an add
5294+
// of the TOC save offset to the stack pointer. This must be the second
5295+
// operand: after the chain input but before any other variadic arguments.
5296+
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
5297+
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5298+
5299+
SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
52805300
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
52815301
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5282-
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5302+
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
52835303
Ops.push_back(AddTOC);
52845304
}
52855305

52865306
// Add the register used for the environment pointer.
52875307
if (Subtarget.usesFunctionDescriptors() && !hasNest)
5288-
Ops.push_back(DAG.getRegister(PPC::X11, MVT::i64));
5308+
Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5309+
RegVT));
5310+
52895311

52905312
// Add CTR register as callee so a bctr can be emitted later.
52915313
if (isTailCall)
@@ -5306,7 +5328,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
53065328
// no way to mark dependencies as implicit here.
53075329
// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
53085330
if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
5309-
Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::X2 : PPC::R2, RegVT));
5331+
Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
53105332

53115333
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
53125334
if (isVarArg && Subtarget.is32BitELFABI())
@@ -6962,9 +6984,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
69626984
if (isVarArg || isPatchPoint)
69636985
report_fatal_error("This call type is unimplemented on AIX.");
69646986

6965-
if (!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
6966-
report_fatal_error("Handling of indirect call is unimplemented!");
6967-
69686987
const PPCSubtarget& Subtarget =
69696988
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
69706989
if (Subtarget.hasQPX())
@@ -7023,6 +7042,26 @@ SDValue PPCTargetLowering::LowerCall_AIX(
70237042
"unimplemented!");
70247043
}
70257044

7045+
// For indirect calls, we need to save the TOC base to the stack for
7046+
// restoration after the call.
7047+
if (!isTailCall && !isPatchPoint &&
7048+
!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
7049+
const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7050+
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7051+
const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7052+
const unsigned TOCSaveOffset =
7053+
Subtarget.getFrameLowering()->getTOCSaveOffset();
7054+
7055+
setUsesTOCBasePtr(DAG);
7056+
SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7057+
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7058+
SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7059+
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7060+
Chain = DAG.getStore(
7061+
Val.getValue(1), dl, Val, AddPtr,
7062+
MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7063+
}
7064+
70267065
// Build a sequence of copy-to-reg nodes chained together with token chain
70277066
// and flag operands which copy the outgoing args into the appropriate regs.
70287067
SDValue InFlag;

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ namespace llvm {
174174
BCTRL,
175175

176176
/// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
177-
/// instruction and the TOC reload required on SVR4 PPC64.
177+
/// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
178+
/// and 64-bit AIX.
178179
BCTRL_LOAD_TOC,
179180

180181
/// Return with a flag operand, matched by 'blr'

llvm/lib/Target/PowerPC/PPCInstrFormats.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,29 @@ class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
15291529
let BH = 0;
15301530
}
15311531

1532+
class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
1533+
bits<5> bi, bit lk, bits<6> opcode2, dag OOL,
1534+
dag IOL, string asmstr, InstrItinClass itin,
1535+
list<dag> pattern>
1536+
: I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
1537+
1538+
bits<5> RST;
1539+
bits<21> D_RA;
1540+
1541+
let Pattern = pattern;
1542+
1543+
let Inst{6-10} = bo;
1544+
let Inst{11-15} = bi;
1545+
let Inst{16-18} = 0;
1546+
let Inst{19-20} = 0; // Unused (BH)
1547+
let Inst{21-30} = xo1;
1548+
let Inst{31} = lk;
1549+
1550+
let Inst{38-42} = RST;
1551+
let Inst{43-47} = D_RA{20-16}; // Base Register
1552+
let Inst{48-63} = D_RA{15-0}; // Displacement
1553+
}
1554+
15321555
// 1.7.8 XFX-Form
15331556
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
15341557
InstrItinClass itin>

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,6 +1645,15 @@ def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
16451645
"#TC_RETURNr $dst $offset",
16461646
[]>;
16471647

1648+
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
1649+
Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in {
1650+
def BCTRL_LWZinto_toc:
1651+
XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
1652+
(ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
1653+
[(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>;
1654+
1655+
}
1656+
16481657

16491658
let isCodeGenOnly = 1 in {
16501659

llvm/lib/Target/PowerPC/PPCSubtarget.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,34 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
358358
return isAIXABI() || (is64BitELFABI() && !isELFv2ABI());
359359
}
360360

361+
unsigned descriptorTOCAnchorOffset() const {
362+
assert(usesFunctionDescriptors() &&
363+
"Should only be called when the target uses descriptors.");
364+
return IsPPC64 ? 8 : 4;
365+
}
366+
367+
unsigned descriptorEnvironmentPointerOffset() const {
368+
assert(usesFunctionDescriptors() &&
369+
"Should only be called when the target uses descriptors.");
370+
return IsPPC64 ? 16 : 8;
371+
}
372+
373+
MCRegister getEnvironmentPointerRegister() const {
374+
assert(usesFunctionDescriptors() &&
375+
"Should only be called when the target uses descriptors.");
376+
return IsPPC64 ? PPC::X11 : PPC::R11;
377+
}
378+
379+
MCRegister getTOCPointerRegister() const {
380+
assert((is64BitELFABI() || isAIXABI()) &&
381+
"Should only be called when the target is a TOC based ABI.");
382+
return IsPPC64 ? PPC::X2 : PPC::R2;
383+
}
384+
385+
MCRegister getStackPointerRegister() const {
386+
return IsPPC64 ? PPC::X1 : PPC::R1;
387+
}
388+
361389
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
362390
};
363391
} // End llvm namespace

0 commit comments

Comments
 (0)