Skip to content

Commit 630f67f

Browse files
committed
add tgt_target_kernel helper and test update
1 parent 692facf commit 630f67f

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,25 @@ pub(crate) fn handle_gpu_code<'ll>(
2727
}
2828
}
2929
gen_call_handling(&cx, &kernels, &o_types);
30+
generate_launcher(&cx);
3031
crate::builder::gpu_wrapper::gen_image_wrapper_module(&cgcx);
3132
}
3233

34+
// ; Function Attrs: nounwind
35+
// declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #2
36+
fn generate_launcher<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value {
37+
let tptr = cx.type_ptr();
38+
let ti64 = cx.type_i64();
39+
let ti32 = cx.type_i32();
40+
let args = vec![tptr, ti64, ti32, ti32, tptr, tptr];
41+
let tgt_fn_ty = cx.type_func(&args, ti32);
42+
let name = "__tgt_target_kernel";
43+
let tgt_decl = declare_offload_fn(&cx, name, tgt_fn_ty);
44+
let nounwind = llvm::AttributeKind::NoUnwind.create_attr(cx.llcx);
45+
attributes::apply_to_llfn(tgt_decl, Function, &[nounwind]);
46+
tgt_decl
47+
}
48+
3349
// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper:
3450
// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3551
// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@@ -83,7 +99,7 @@ pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Ty
8399
offload_entry_ty
84100
}
85101

86-
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
102+
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type {
87103
let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments");
88104
let tptr = cx.type_ptr();
89105
let ti64 = cx.type_i64();
@@ -118,9 +134,10 @@ fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
118134
vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32];
119135

120136
cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false);
137+
kernel_arguments_ty
121138
// For now we don't handle kernels, so for now we just add a global dummy
122139
// to make sure that the __tgt_offload_entry is defined and handled correctly.
123-
cx.declare_global("my_struct_global2", kernel_arguments_ty);
140+
//cx.declare_global("my_struct_global2", kernel_arguments_ty);
124141
}
125142

126143
fn gen_tgt_data_mappers<'ll>(
@@ -295,7 +312,7 @@ fn gen_call_handling<'ll>(
295312
let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
296313
cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false);
297314

298-
gen_tgt_kernel_global(&cx);
315+
let tgt_kernel_decl = gen_tgt_kernel_global(&cx);
299316
let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx);
300317

301318
let main_fn = cx.get_function("main");
@@ -329,6 +346,9 @@ fn gen_call_handling<'ll>(
329346
// These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
330347
let ty2 = cx.type_array(cx.type_i64(), num_args);
331348
let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");
349+
350+
let a5 = builder.direct_alloca(tgt_kernel_decl, Align::EIGHT, "kernel_args");
351+
//%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
332352
// Now we allocate once per function param, a copy to be passed to one of our maps.
333353
let mut vals = vec![];
334354
let mut geps = vec![];

tests/codegen/gpu_offload/gpu_host.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@ fn main() {
2121
}
2222

2323
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
24-
// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2524
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2625
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
26+
// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2727

2828
// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 1024]
2929
// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 3]
3030
// CHECK: @.kernel_1.region_id = weak unnamed_addr constant i8 0
3131
// CHECK: @.offloading.entry_name.1 = internal unnamed_addr constant [9 x i8] c"kernel_1\00", section ".llvm.rodata.offloading", align 1
3232
// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section ".omp_offloading_entries", align 1
33-
// CHECK: @my_struct_global2 = external global %struct.__tgt_kernel_arguments
3433
// CHECK: @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3534
// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
3635

@@ -43,6 +42,7 @@ fn main() {
4342
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
4443
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
4544
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
45+
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
4646
// CHECK-NEXT: %x.addr = alloca ptr, align 8
4747
// CHECK-NEXT: store ptr %x, ptr %x.addr, align 8
4848
// CHECK-NEXT: %1 = load ptr, ptr %x.addr, align 8
@@ -71,6 +71,9 @@ fn main() {
7171
// CHECK: ret void
7272
// CHECK-NEXT: }
7373

74+
// CHECK: Function Attrs: nounwind
75+
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
76+
7477
#[unsafe(no_mangle)]
7578
#[inline(never)]
7679
pub fn kernel_1(x: &mut [f32; 256]) {

0 commit comments

Comments
 (0)