Skip to content

Commit 4ea1baf

Browse files
committed
AMDGPU: Initial, crude support for indirect calls
This isn't really usable, and requires using the -amdgpu-fixed-function-abi flag to work. Assumes a uniform call target, and will hit a verifier error if the call target ends up in a VGPR. Also doesn't attempt to do anything sensible for the reported register/stack usage.
1 parent ea4597e commit 4ea1baf

File tree

5 files changed

+247
-27
lines changed

5 files changed

+247
-27
lines changed

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,14 @@ struct AMDGPUFunctionArgInfo {
159159

160160
class AMDGPUArgumentUsageInfo : public ImmutablePass {
161161
private:
162-
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
163-
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
164162
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
165163

166164
public:
167165
static char ID;
168166

167+
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
168+
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
169+
169170
AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
170171

171172
void getAnalysisUsage(AnalysisUsage &AU) const override {

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,15 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
601601
return std::max(NumVGPR, NumAGPR);
602602
}
603603

604+
static const Function *getCalleeFunction(const MachineOperand &Op) {
605+
if (Op.isImm()) {
606+
assert(Op.getImm() == 0);
607+
return nullptr;
608+
}
609+
610+
return cast<Function>(Op.getGlobal());
611+
}
612+
604613
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
605614
const MachineFunction &MF) const {
606615
SIFunctionResourceInfo Info;
@@ -853,8 +862,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
853862

854863
const MachineOperand *CalleeOp
855864
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
856-
const Function *Callee = cast<Function>(CalleeOp->getGlobal());
857-
if (Callee->isDeclaration()) {
865+
866+
const Function *Callee = getCalleeFunction(*CalleeOp);
867+
if (!Callee || Callee->isDeclaration()) {
858868
// If this is a call to an external function, we can't do much. Make
859869
// conservative guesses.
860870

@@ -897,7 +907,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
897907
Info.HasRecursion |= I->second.HasRecursion;
898908
}
899909

900-
if (!Callee->doesNotRecurse())
910+
// FIXME: Call site could have norecurse on it
911+
if (!Callee || !Callee->doesNotRecurse())
901912
Info.HasRecursion = true;
902913
}
903914
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,21 +2445,20 @@ void SITargetLowering::passSpecialInputs(
24452445
if (!CLI.CS)
24462446
return;
24472447

2448-
const Function *CalleeFunc = CLI.CS.getCalledFunction();
2449-
assert(CalleeFunc);
2450-
24512448
SelectionDAG &DAG = CLI.DAG;
24522449
const SDLoc &DL = CLI.DL;
24532450

24542451
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
2455-
2456-
auto &ArgUsageInfo =
2457-
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
2458-
const AMDGPUFunctionArgInfo &CalleeArgInfo
2459-
= ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
2460-
24612452
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
24622453

2454+
const AMDGPUFunctionArgInfo *CalleeArgInfo
2455+
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
2456+
if (const Function *CalleeFunc = CLI.CS.getCalledFunction()) {
2457+
auto &ArgUsageInfo =
2458+
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
2459+
CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
2460+
}
2461+
24632462
// TODO: Unify with private memory register handling. This is complicated by
24642463
// the fact that at least in kernels, the input argument is not necessarily
24652464
// in the same ___location as the input.
@@ -2477,7 +2476,7 @@ void SITargetLowering::passSpecialInputs(
24772476
const ArgDescriptor *OutgoingArg;
24782477
const TargetRegisterClass *ArgRC;
24792478

2480-
std::tie(OutgoingArg, ArgRC) = CalleeArgInfo.getPreloadedValue(InputID);
2479+
std::tie(OutgoingArg, ArgRC) = CalleeArgInfo->getPreloadedValue(InputID);
24812480
if (!OutgoingArg)
24822481
continue;
24832482

@@ -2518,13 +2517,13 @@ void SITargetLowering::passSpecialInputs(
25182517
const TargetRegisterClass *ArgRC;
25192518

25202519
std::tie(OutgoingArg, ArgRC) =
2521-
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
2520+
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
25222521
if (!OutgoingArg)
25232522
std::tie(OutgoingArg, ArgRC) =
2524-
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
2523+
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
25252524
if (!OutgoingArg)
25262525
std::tie(OutgoingArg, ArgRC) =
2527-
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
2526+
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
25282527
if (!OutgoingArg)
25292528
return;
25302529

@@ -2539,18 +2538,18 @@ void SITargetLowering::passSpecialInputs(
25392538
SDLoc SL;
25402539

25412540
// If incoming ids are not packed we need to pack them.
2542-
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX)
2541+
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
25432542
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
25442543

2545-
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY) {
2544+
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
25462545
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
25472546
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
25482547
DAG.getShiftAmountConstant(10, MVT::i32, SL));
25492548
InputReg = InputReg.getNode() ?
25502549
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
25512550
}
25522551

2553-
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ) {
2552+
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
25542553
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
25552554
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
25562555
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2708,7 +2707,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
27082707
if (!CLI.CS.getInstruction())
27092708
report_fatal_error("unsupported libcall legalization");
27102709

2711-
if (!CLI.CS.getCalledFunction()) {
2710+
if (!AMDGPUTargetMachine::EnableFixedFunctionABI && !CLI.CS.getCalledFunction()) {
27122711
return lowerUnhandledCall(CLI, InVals,
27132712
"unsupported indirect call to function ");
27142713
}
@@ -2937,9 +2936,12 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
29372936
Ops.push_back(Callee);
29382937
// Add a redundant copy of the callee global which will not be legalized, as
29392938
// we need direct access to the callee later.
2940-
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Callee);
2941-
const GlobalValue *GV = GSD->getGlobal();
2942-
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
2939+
if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
2940+
const GlobalValue *GV = GSD->getGlobal();
2941+
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
2942+
} else {
2943+
Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2944+
}
29432945

29442946
if (IsTailCall) {
29452947
// Each tail call may have to adjust the stack by a different amount, so

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,8 @@ def SI_RETURN : SPseudoInstSI <
445445

446446
// Return for returning function calls without output register.
447447
//
448-
// This version is only needed so we can fill in the output regiter in
449-
// the custom inserter.
448+
// This version is only needed so we can fill in the output register
449+
// in the custom inserter.
450450
def SI_CALL_ISEL : SPseudoInstSI <
451451
(outs), (ins SSrc_b64:$src0, unknown:$callee),
452452
[(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
@@ -458,6 +458,11 @@ def SI_CALL_ISEL : SPseudoInstSI <
458458
let isConvergent = 1;
459459
}
460460

461+
def : GCNPat<
462+
(AMDGPUcall i64:$src0, (i64 0)),
463+
(SI_CALL_ISEL $src0, (i64 0))
464+
>;
465+
461466
// Wrapper around s_swappc_b64 with extra $callee parameter to track
462467
// the called function after regalloc.
463468
def SI_CALL : SPseudoInstSI <
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3+
4+
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
5+
@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
6+
7+
define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
8+
; GCN-LABEL: test_indirect_call_sgpr_ptr:
9+
; GCN: .amd_kernel_code_t
10+
; GCN-NEXT: amd_code_version_major = 1
11+
; GCN-NEXT: amd_code_version_minor = 2
12+
; GCN-NEXT: amd_machine_kind = 1
13+
; GCN-NEXT: amd_machine_version_major = 7
14+
; GCN-NEXT: amd_machine_version_minor = 0
15+
; GCN-NEXT: amd_machine_version_stepping = 0
16+
; GCN-NEXT: kernel_code_entry_byte_offset = 256
17+
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
18+
; GCN-NEXT: granulated_workitem_vgpr_count = 7
19+
; GCN-NEXT: granulated_wavefront_sgpr_count = 5
20+
; GCN-NEXT: priority = 0
21+
; GCN-NEXT: float_mode = 192
22+
; GCN-NEXT: priv = 0
23+
; GCN-NEXT: enable_dx10_clamp = 1
24+
; GCN-NEXT: debug_mode = 0
25+
; GCN-NEXT: enable_ieee_mode = 1
26+
; GCN-NEXT: enable_wgp_mode = 0
27+
; GCN-NEXT: enable_mem_ordered = 0
28+
; GCN-NEXT: enable_fwd_progress = 0
29+
; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
30+
; GCN-NEXT: user_sgpr_count = 14
31+
; GCN-NEXT: enable_trap_handler = 0
32+
; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
33+
; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
34+
; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
35+
; GCN-NEXT: enable_sgpr_workgroup_info = 0
36+
; GCN-NEXT: enable_vgpr_workitem_id = 2
37+
; GCN-NEXT: enable_exception_msb = 0
38+
; GCN-NEXT: granulated_lds_size = 0
39+
; GCN-NEXT: enable_exception = 0
40+
; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
41+
; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
42+
; GCN-NEXT: enable_sgpr_queue_ptr = 1
43+
; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
44+
; GCN-NEXT: enable_sgpr_dispatch_id = 1
45+
; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
46+
; GCN-NEXT: enable_sgpr_private_segment_size = 0
47+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
48+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
49+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
50+
; GCN-NEXT: enable_wavefront_size32 = 0
51+
; GCN-NEXT: enable_ordered_append_gds = 0
52+
; GCN-NEXT: private_element_size = 1
53+
; GCN-NEXT: is_ptr64 = 1
54+
; GCN-NEXT: is_dynamic_callstack = 1
55+
; GCN-NEXT: is_debug_enabled = 0
56+
; GCN-NEXT: is_xnack_enabled = 1
57+
; GCN-NEXT: workitem_private_segment_byte_size = 16384
58+
; GCN-NEXT: workgroup_group_segment_byte_size = 0
59+
; GCN-NEXT: gds_segment_byte_size = 0
60+
; GCN-NEXT: kernarg_segment_byte_size = 0
61+
; GCN-NEXT: workgroup_fbarrier_count = 0
62+
; GCN-NEXT: wavefront_sgpr_count = 48
63+
; GCN-NEXT: workitem_vgpr_count = 32
64+
; GCN-NEXT: reserved_vgpr_first = 0
65+
; GCN-NEXT: reserved_vgpr_count = 0
66+
; GCN-NEXT: reserved_sgpr_first = 0
67+
; GCN-NEXT: reserved_sgpr_count = 0
68+
; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
69+
; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
70+
; GCN-NEXT: kernarg_segment_alignment = 4
71+
; GCN-NEXT: group_segment_alignment = 4
72+
; GCN-NEXT: private_segment_alignment = 4
73+
; GCN-NEXT: wavefront_size = 6
74+
; GCN-NEXT: call_convention = -1
75+
; GCN-NEXT: runtime_loader_kernel_symbol = 0
76+
; GCN-NEXT: .end_amd_kernel_code_t
77+
; GCN-NEXT: ; %bb.0:
78+
; GCN-NEXT: s_mov_b32 s33, s17
79+
; GCN-NEXT: s_mov_b32 s32, s33
80+
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
81+
; GCN-NEXT: s_add_u32 s12, s12, s33
82+
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
83+
; GCN-NEXT: s_getpc_b64 s[4:5]
84+
; GCN-NEXT: s_add_u32 s4, s4, gv.fptr0@rel32@lo+4
85+
; GCN-NEXT: s_addc_u32 s5, s5, gv.fptr0@rel32@hi+4
86+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
87+
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
88+
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
89+
; GCN-NEXT: v_or_b32_e32 v0, v0, v1
90+
; GCN-NEXT: v_or_b32_e32 v31, v0, v2
91+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
92+
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
93+
; GCN-NEXT: s_endpgm
94+
%fptr = load void()*, void()* addrspace(4)* @gv.fptr0
95+
call void %fptr()
96+
ret void
97+
}
98+
99+
define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
100+
; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
101+
; GCN: .amd_kernel_code_t
102+
; GCN-NEXT: amd_code_version_major = 1
103+
; GCN-NEXT: amd_code_version_minor = 2
104+
; GCN-NEXT: amd_machine_kind = 1
105+
; GCN-NEXT: amd_machine_version_major = 7
106+
; GCN-NEXT: amd_machine_version_minor = 0
107+
; GCN-NEXT: amd_machine_version_stepping = 0
108+
; GCN-NEXT: kernel_code_entry_byte_offset = 256
109+
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
110+
; GCN-NEXT: granulated_workitem_vgpr_count = 7
111+
; GCN-NEXT: granulated_wavefront_sgpr_count = 5
112+
; GCN-NEXT: priority = 0
113+
; GCN-NEXT: float_mode = 192
114+
; GCN-NEXT: priv = 0
115+
; GCN-NEXT: enable_dx10_clamp = 1
116+
; GCN-NEXT: debug_mode = 0
117+
; GCN-NEXT: enable_ieee_mode = 1
118+
; GCN-NEXT: enable_wgp_mode = 0
119+
; GCN-NEXT: enable_mem_ordered = 0
120+
; GCN-NEXT: enable_fwd_progress = 0
121+
; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
122+
; GCN-NEXT: user_sgpr_count = 14
123+
; GCN-NEXT: enable_trap_handler = 0
124+
; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
125+
; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
126+
; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
127+
; GCN-NEXT: enable_sgpr_workgroup_info = 0
128+
; GCN-NEXT: enable_vgpr_workitem_id = 2
129+
; GCN-NEXT: enable_exception_msb = 0
130+
; GCN-NEXT: granulated_lds_size = 0
131+
; GCN-NEXT: enable_exception = 0
132+
; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
133+
; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
134+
; GCN-NEXT: enable_sgpr_queue_ptr = 1
135+
; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
136+
; GCN-NEXT: enable_sgpr_dispatch_id = 1
137+
; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
138+
; GCN-NEXT: enable_sgpr_private_segment_size = 0
139+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
140+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
141+
; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
142+
; GCN-NEXT: enable_wavefront_size32 = 0
143+
; GCN-NEXT: enable_ordered_append_gds = 0
144+
; GCN-NEXT: private_element_size = 1
145+
; GCN-NEXT: is_ptr64 = 1
146+
; GCN-NEXT: is_dynamic_callstack = 1
147+
; GCN-NEXT: is_debug_enabled = 0
148+
; GCN-NEXT: is_xnack_enabled = 1
149+
; GCN-NEXT: workitem_private_segment_byte_size = 16384
150+
; GCN-NEXT: workgroup_group_segment_byte_size = 0
151+
; GCN-NEXT: gds_segment_byte_size = 0
152+
; GCN-NEXT: kernarg_segment_byte_size = 0
153+
; GCN-NEXT: workgroup_fbarrier_count = 0
154+
; GCN-NEXT: wavefront_sgpr_count = 48
155+
; GCN-NEXT: workitem_vgpr_count = 32
156+
; GCN-NEXT: reserved_vgpr_first = 0
157+
; GCN-NEXT: reserved_vgpr_count = 0
158+
; GCN-NEXT: reserved_sgpr_first = 0
159+
; GCN-NEXT: reserved_sgpr_count = 0
160+
; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
161+
; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
162+
; GCN-NEXT: kernarg_segment_alignment = 4
163+
; GCN-NEXT: group_segment_alignment = 4
164+
; GCN-NEXT: private_segment_alignment = 4
165+
; GCN-NEXT: wavefront_size = 6
166+
; GCN-NEXT: call_convention = -1
167+
; GCN-NEXT: runtime_loader_kernel_symbol = 0
168+
; GCN-NEXT: .end_amd_kernel_code_t
169+
; GCN-NEXT: ; %bb.0:
170+
; GCN-NEXT: s_mov_b32 s33, s17
171+
; GCN-NEXT: s_mov_b32 s32, s33
172+
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
173+
; GCN-NEXT: s_add_u32 s12, s12, s33
174+
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
175+
; GCN-NEXT: s_getpc_b64 s[4:5]
176+
; GCN-NEXT: s_add_u32 s4, s4, gv.fptr1@rel32@lo+4
177+
; GCN-NEXT: s_addc_u32 s5, s5, gv.fptr1@rel32@hi+4
178+
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
179+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
180+
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
181+
; GCN-NEXT: v_or_b32_e32 v0, v0, v1
182+
; GCN-NEXT: v_or_b32_e32 v31, v0, v2
183+
; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
184+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
185+
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
186+
; GCN-NEXT: s_endpgm
187+
%fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
188+
call void %fptr(i32 123)
189+
ret void
190+
}
191+
192+
; FIXME
193+
; define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
194+
; call void %fptr()
195+
; ret void
196+
; }
197+
198+
; define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
199+
; call void %fptr(i32 123)
200+
; ret void
201+
; }

0 commit comments

Comments
 (0)