Skip to content

Commit 3282d87

Browse files
committed
[PowerPC][AIX] ByVal formal arguments in a single register.
Adds support for passing ByVal formal arguments as long as they fit in a single register. Differential Revision: https://reviews.llvm.org/D76401
1 parent 86e0a6c commit 3282d87

File tree

3 files changed

+374
-29
lines changed

3 files changed

+374
-29
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6861,9 +6861,14 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
68616861

68626862
const unsigned ByValSize = ArgFlags.getByValSize();
68636863

6864-
// An empty aggregate parameter takes up no storage and no registers.
6865-
if (ByValSize == 0)
6864+
// An empty aggregate parameter takes up no storage and no registers,
6865+
// but needs a MemLoc for a stack slot for the formal arguments side.
6866+
if (ByValSize == 0) {
6867+
State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6868+
State.getNextStackOffset(), RegVT,
6869+
LocInfo));
68666870
return false;
6871+
}
68676872

68686873
if (ByValSize <= PtrByteSize) {
68696874
State.AllocateStack(PtrByteSize, PtrByteSize);
@@ -6978,6 +6983,24 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
69786983
return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
69796984
}
69806985

6986+
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6987+
const unsigned LASize = FL->getLinkageSize();
6988+
6989+
if (PPC::GPRCRegClass.contains(Reg)) {
6990+
assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6991+
"Reg must be a valid argument register!");
6992+
return LASize + 4 * (Reg - PPC::R3);
6993+
}
6994+
6995+
if (PPC::G8RCRegClass.contains(Reg)) {
6996+
assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6997+
"Reg must be a valid argument register!");
6998+
return LASize + 8 * (Reg - PPC::X3);
6999+
}
7000+
7001+
llvm_unreachable("Only general purpose registers expected.");
7002+
}
7003+
69817004
SDValue PPCTargetLowering::LowerFormalArguments_AIX(
69827005
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
69837006
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -7015,12 +7038,12 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
70157038
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
70167039
CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
70177040

7041+
SmallVector<SDValue, 8> MemOps;
7042+
70187043
for (CCValAssign &VA : ArgLocs) {
70197044
EVT ValVT = VA.getValVT();
70207045
MVT LocVT = VA.getLocVT();
70217046
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7022-
assert(!Flags.isByVal() &&
7023-
"Passing structure by value is unimplemented for formal arguments.");
70247047
assert((VA.isRegLoc() || VA.isMemLoc()) &&
70257048
"Unexpected ___location for function call argument.");
70267049

@@ -7033,6 +7056,59 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
70337056
if (VA.isMemLoc() && VA.needsCustom())
70347057
continue;
70357058

7059+
if (Flags.isByVal() && VA.isMemLoc()) {
7060+
if (Flags.getByValSize() != 0)
7061+
report_fatal_error(
7062+
"ByVal arguments passed on stack not implemented yet");
7063+
7064+
const int FI = MF.getFrameInfo().CreateFixedObject(
7065+
PtrByteSize, VA.getLocMemOffset(), /* IsImmutable */ false,
7066+
/* IsAliased */ true);
7067+
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7068+
InVals.push_back(FIN);
7069+
7070+
continue;
7071+
}
7072+
7073+
if (Flags.isByVal()) {
7074+
assert(VA.isRegLoc() && "MemLocs should already be handled.");
7075+
7076+
const unsigned ByValSize = Flags.getByValSize();
7077+
if (ByValSize > PtrByteSize)
7078+
report_fatal_error("Formal arguments greater then register size not "
7079+
"implemented yet.");
7080+
7081+
const MCPhysReg ArgReg = VA.getLocReg();
7082+
const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7083+
const unsigned Offset = mapArgRegToOffsetAIX(ArgReg, FL);
7084+
7085+
const unsigned StackSize = alignTo(ByValSize, PtrByteSize);
7086+
const int FI = MF.getFrameInfo().CreateFixedObject(
7087+
StackSize, Offset, /* IsImmutable */ false, /* IsAliased */ true);
7088+
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7089+
7090+
InVals.push_back(FIN);
7091+
7092+
const unsigned VReg = MF.addLiveIn(ArgReg, IsPPC64 ? &PPC::G8RCRegClass
7093+
: &PPC::GPRCRegClass);
7094+
7095+
// Since the callers side has left justified the aggregate in the
7096+
// register, we can simply store the entire register into the stack
7097+
// slot.
7098+
// The store to the fixedstack object is needed becuase accessing a
7099+
// field of the ByVal will use a gep and load. Ideally we will optimize
7100+
// to extracting the value from the register directly, and elide the
7101+
// stores when the arguments address is not taken, but that will need to
7102+
// be future work.
7103+
SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7104+
SDValue Store =
7105+
DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom, FIN,
7106+
MachinePointerInfo::getFixedStack(MF, FI, 0));
7107+
7108+
MemOps.push_back(Store);
7109+
continue;
7110+
}
7111+
70367112
if (VA.isRegLoc()) {
70377113
MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
70387114
unsigned VReg =
@@ -7080,6 +7156,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
70807156
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
70817157
FuncInfo->setMinReservedArea(CallerReservedArea);
70827158

7159+
if (!MemOps.empty())
7160+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7161+
70837162
return Chain;
70847163
}
70857164

@@ -7156,8 +7235,13 @@ SDValue PPCTargetLowering::LowerCall_AIX(
71567235

71577236
if (Flags.isByVal()) {
71587237
const unsigned ByValSize = Flags.getByValSize();
7238+
7239+
// Nothing to do for zero-sized ByVals on the caller side.
7240+
if (!ByValSize)
7241+
continue;
7242+
71597243
assert(
7160-
VA.isRegLoc() && ByValSize > 0 && ByValSize <= PtrByteSize &&
7244+
VA.isRegLoc() && ByValSize <= PtrByteSize &&
71617245
"Pass-by-value arguments are only supported in a single register.");
71627246

71637247
// Loads must be a power-of-2 size and cannot be larger than the

llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

Lines changed: 184 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
define void @call_test_byval_1Byte() {
2323
entry:
2424
%s0 = alloca %struct.S0, align 8
25-
call void @test_byval_1Byte(%struct.S0* byval(%struct.S0) align 1 %s0, %struct.S1* byval(%struct.S1) align 1 @gS1)
25+
%call = call zeroext i8 @test_byval_1Byte(%struct.S0* byval(%struct.S0) align 1 %s0, %struct.S1* byval(%struct.S1) align 1 @gS1)
2626
ret void
2727
}
2828

29-
declare void @test_byval_1Byte(%struct.S0* byval(%struct.S0) align 1, %struct.S1* byval(%struct.S1) align 1)
3029

3130
; CHECK-LABEL: name: call_test_byval_1Byte{{.*}}
3231

@@ -63,18 +62,59 @@ declare void @test_byval_1Byte(%struct.S0* byval(%struct.S0) align 1, %struct.S1
6362
; ASM64-NEXT: nop
6463
; ASM64-NEXT: addi 1, 1, 128
6564

65+
66+
define zeroext i8 @test_byval_1Byte(%struct.S0* byval(%struct.S0) align 1 %s0, %struct.S1* byval(%struct.S1) align 1 %s) {
67+
entry:
68+
%arrayidx = getelementptr inbounds %struct.S1, %struct.S1* %s, i32 0, i32 0, i32 0
69+
%0 = load i8, i8* %arrayidx, align 1
70+
ret i8 %0
71+
}
72+
73+
; CHECK-LABEL: name: test_byval_1Byte
74+
75+
; 32BIT: fixedStack:
76+
; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 4, alignment: 8, stack-id: default,
77+
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
78+
; 32BIT: - { id: 1, type: default, offset: 24, size: 4, alignment: 8, stack-id: default,
79+
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
80+
81+
; 32BIT: bb.0.entry:
82+
; 32BIT-NEXT: liveins: $r3
83+
; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 8)
84+
; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (dereferenceable load 1
85+
; 32BIT-NEXT: BLR
86+
87+
; 64BIT: fixedStack:
88+
; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, stack-id: default,
89+
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
90+
; 64BIT: - { id: 1, type: default, offset: 48, size: 8, alignment: 16, stack-id: default,
91+
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
92+
93+
; 64BIT: bb.0.entry:
94+
; 64BIT-NEXT: liveins: $x3
95+
; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16)
96+
; 64BIT-NEXT: renamable $x3 = LBZ8 0, %fixed-stack.0 :: (dereferenceable load 1
97+
98+
; CHECKASM-LABEL: .test_byval_1Byte:
99+
100+
; ASM32: stw 3, 24(1)
101+
; ASM32-NEXT: lbz 3, 24(1)
102+
; ASM32-NEXT: blr
103+
104+
; ASM64: std 3, 48(1)
105+
; ASM64-NEXT: lbz 3, 48(1)
106+
; ASM64-NEXT: blr
107+
66108
%struct.S2 = type { [2 x i8] }
67109

68110
@gS2 = external global %struct.S2, align 1
69111

70112
define void @call_test_byval_2Byte() {
71113
entry:
72-
call void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2)
114+
%call = call zeroext i8 @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2)
73115
ret void
74116
}
75117

76-
declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1)
77-
78118
; CHECK-LABEL: name: call_test_byval_2Byte{{.*}}
79119

80120
; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@@ -110,18 +150,51 @@ declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1)
110150
; ASM64-NEXT: nop
111151
; ASM64-NEXT: addi 1, 1, 112
112152

113-
%struct.S3 = type <{ i8, i16 }>
114153

154+
define zeroext i8 @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 %s) {
155+
entry:
156+
%arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s, i32 0, i32 0, i32 1
157+
%0 = load i8, i8* %arrayidx, align 1
158+
ret i8 %0
159+
}
160+
161+
; CHECK-LABEL: name: test_byval_2Byte
162+
; 32BIT: fixedStack:
163+
; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 4, alignment: 8, stack-id: default,
164+
165+
; 32BIT: bb.0.entry:
166+
; 32BIT-NEXT: liveins: $r3
167+
; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 8)
168+
; 32BIT-NEXT: renamable $r3 = LBZ 1, %fixed-stack.0 :: (dereferenceable load 1
169+
170+
; 64BIT: fixedStack:
171+
; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, stack-id: default,
172+
173+
; 64BIT: bb.0.entry:
174+
; 64BIT-NEXT: liveins: $x3
175+
; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16)
176+
; 64BIT-NEXT: renamable $x3 = LBZ8 1, %fixed-stack.0 :: (dereferenceable load 1
177+
178+
; CHECKASM-LABEL: .test_byval_2Byte:
179+
180+
; ASM32: stw 3, 24(1)
181+
; ASM32-NEXT: lbz 3, 25(1)
182+
; ASM32-NEXT: blr
183+
184+
; ASM64: std 3, 48(1)
185+
; ASM64-NEXT: lbz 3, 49(1)
186+
; ASM64-NEXT: blr
187+
188+
189+
%struct.S3 = type <{ i8, i16 }>
115190
@gS3 = external global %struct.S3, align 1
116191

117192
define void @call_test_byval_3Byte() {
118193
entry:
119-
call void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3)
194+
%call = call zeroext i16 @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3)
120195
ret void
121196
}
122197

123-
declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1)
124-
125198
; CHECK-LABEL: name: call_test_byval_3Byte{{.*}}
126199

127200
; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
@@ -166,6 +239,44 @@ declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1)
166239
; ASM64-NEXT: bl .test_byval_3Byte
167240
; ASM64-NEXT: nop
168241

242+
243+
define zeroext i16 @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 %s) {
244+
entry:
245+
%gep = getelementptr inbounds %struct.S3, %struct.S3* %s, i32 0, i32 1
246+
%0 = load i16, i16* %gep, align 1
247+
ret i16 %0
248+
}
249+
250+
; CHECK-LABEL: name: test_byval_3Byte
251+
252+
; 32BIT: fixedStack:
253+
; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 4, alignment: 8, stack-id: default,
254+
255+
; 32BIT: bb.0.entry:
256+
; 32BIT-NEXT: liveins: $r3
257+
; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 8)
258+
; 32BIT-NEXT: renamable $r3 = LHZ 1, %fixed-stack.0 :: (dereferenceable load 2
259+
260+
; 64BIT: fixedStack:
261+
; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, stack-id: default,
262+
263+
; 64BIT: bb.0.entry:
264+
; 64BIT-NEXT: liveins: $x3
265+
; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16)
266+
; 64BIT-NEXT: renamable $x3 = LHZ8 1, %fixed-stack.0 :: (dereferenceable load 2
267+
268+
269+
; CHECKASM-LABEL: .test_byval_3Byte:
270+
271+
; ASM32: stw 3, 24(1)
272+
; ASM32-NEXT: lhz 3, 25(1)
273+
; ASM32-NEXT: blr
274+
275+
; ASM64: std 3, 48(1)
276+
; ASM64-NEXT: lhz 3, 49(1)
277+
; ASM64-NEXT: blr
278+
279+
169280
%struct.S4 = type { [4 x i8] }
170281
%struct.S4A = type { i32 }
171282

@@ -175,12 +286,10 @@ define void @call_test_byval_4Byte() {
175286
entry:
176287
%s0 = alloca %struct.S0, align 8
177288
%s4a = alloca %struct.S4A, align 4
178-
call void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4, %struct.S0* byval(%struct.S0) align 1 %s0, %struct.S4A* byval(%struct.S4A) align 4 %s4a)
289+
%call = call signext i32 @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4, %struct.S0* byval(%struct.S0) align 1 %s0, %struct.S4A* byval(%struct.S4A) align 4 %s4a)
179290
ret void
180291
}
181292

182-
declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1, %struct.S0* byval(%struct.S0) align 1, %struct.S4A* byval(%struct.S4A) align 4)
183-
184293
; CHECK-LABEL: name: call_test_byval_4Byte{{.*}}
185294

186295
; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@@ -219,3 +328,66 @@ declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1, %struct.S0
219328
; ASM64-NEXT: nop
220329
; ASM64-NEXT: addi 1, 1, 128
221330

331+
332+
define signext i32 @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 %s, %struct.S0* byval(%struct.S0) align 1, %struct.S4A* byval(%struct.S4A) align 4 %s4a) {
333+
entry:
334+
%arrayidx = getelementptr inbounds %struct.S4, %struct.S4* %s, i32 0, i32 0, i32 3
335+
%gep = getelementptr inbounds %struct.S4A, %struct.S4A* %s4a, i32 0, i32 0
336+
%1 = load i8, i8* %arrayidx, align 1
337+
%2 = load i32, i32* %gep, align 4
338+
%conv = zext i8 %1 to i32
339+
%add = add nsw i32 %2, %conv
340+
ret i32 %add
341+
}
342+
343+
; CHECK-LABEL: name: test_byval_4Byte
344+
345+
; 32BIT: fixedStack:
346+
; 32BIT-NEXT: - { id: 0, type: default, offset: 28, size: 4, alignment: 4, stack-id: default,
347+
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
348+
; 32BIT: - { id: 1, type: default, offset: 28, size: 4, alignment: 4, stack-id: default,
349+
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
350+
; 32BIT: - { id: 2, type: default, offset: 24, size: 4, alignment: 8, stack-id: default,
351+
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
352+
353+
; 32BIT: bb.0.entry:
354+
; 32BIT-NEXT: liveins: $r3
355+
; 32BIT: STW renamable $r3, 0, %fixed-stack.2 :: (store 4 into %fixed-stack.2, align 8)
356+
; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0)
357+
; 32BIT-DAG: renamable $r[[SCRATCH:[0-9]+]] = RLWINM killed renamable $r3, 0, 24, 31
358+
; 32BIT-DAG: renamable $r3 = nsw ADD4 renamable $r4, killed renamable $r[[SCRATCH]]
359+
; 32BIT: BLR
360+
361+
; 64BIT: fixedStack:
362+
; 64BIT-NEXT: - { id: 0, type: default, offset: 56, size: 8, alignment: 8, stack-id: default,
363+
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
364+
; 64BIT: - { id: 1, type: default, offset: 56, size: 8, alignment: 8, stack-id: default,
365+
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
366+
; 64BIT: - { id: 2, type: default, offset: 48, size: 8, alignment: 16, stack-id: default,
367+
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
368+
369+
; 64BIT: bb.0.entry:
370+
; 64BIT-NEXT: liveins: $x3
371+
; 64BIT: STD killed renamable $x3, 0, %fixed-stack.2 :: (store 8 into %fixed-stack.2, align 16)
372+
; 64BIT-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0)
373+
; 64BIT-DAG: renamable $r[[SCRATCH1:[0-9]+]] = LBZ 3, %fixed-stack.2 :: (dereferenceable load 1
374+
; 64BIT-DAG: renamable $r[[SCRATCH2:[0-9]+]] = LWZ 0, %fixed-stack.0 :: (dereferenceable load 4
375+
; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]]
376+
; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r[[SCRATCH3]]
377+
; 64BIT-NEXT: BLR8
378+
379+
; CHECKASM-LABEL: .test_byval_4Byte:
380+
381+
; ASM32: stw 3, 24(1)
382+
; ASM32-DAG: stw 4, 28(1)
383+
; ASM32-DAG: clrlwi [[SCRATCH:[0-9]+]], 3, 24
384+
; ASM32-DAG: add 3, 4, [[SCRATCH]]
385+
; ASM32-NEXT: blr
386+
387+
; ASM64: std 3, 48(1)
388+
; ASM64-NEXT: std 4, 56(1)
389+
; ASM64-DAG: lbz [[SCRATCH1:[0-9]+]], 51(1)
390+
; ASM64-DAG: lwz [[SCRATCH2:[0-9]+]], 56(1)
391+
; ASM64-NEXT: add [[SCRATCH3:[0-9]+]], [[SCRATCH2]], [[SCRATCH1]]
392+
; ASM64-NEXT: extsw 3, [[SCRATCH3]]
393+
; ASM64-NEXT: blr

0 commit comments

Comments
 (0)