Skip to content

Commit cc13fc3

Browse files
committed
generate more geps and stores for kernel_args
1 parent cb52508 commit cc13fc3

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Ty
9999
offload_entry_ty
100100
}
101101

102-
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type {
102+
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) -> (&'ll llvm::Type, Vec<&'ll llvm::Type>) {
103103
let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments");
104104
let tptr = cx.type_ptr();
105105
let ti64 = cx.type_i64();
@@ -134,7 +134,7 @@ fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type {
134134
vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32];
135135

136136
cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false);
137-
kernel_arguments_ty
137+
(kernel_arguments_ty, kernel_elements)
138138
// For now we don't handle kernels, so for now we just add a global dummy
139139
// to make sure that the __tgt_offload_entry is defined and handled correctly.
140140
//cx.declare_global("my_struct_global2", kernel_arguments_ty);
@@ -312,7 +312,7 @@ fn gen_call_handling<'ll>(
312312
let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
313313
cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false);
314314

315-
let tgt_kernel_decl = gen_tgt_kernel_global(&cx);
315+
let (tgt_kernel_decl, tgt_kernel_types) = gen_tgt_kernel_global(&cx);
316316
let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx);
317317

318318
let main_fn = cx.get_function("main");
@@ -470,22 +470,25 @@ fn gen_call_handling<'ll>(
470470
//store i32 0, ptr %40, align 4
471471
// FIXME(offload): launch kernels
472472
let mut values = vec![];
473-
values.push(cx.get_const_i32(3));
474-
values.push(cx.get_const_i32(3));
475-
values.push(geps.0);
476-
values.push(geps.1);
477-
values.push(geps.2);
478-
values.push(o_types[0]);
479-
values.push(cx.const_null(cx.type_ptr()));
480-
values.push(cx.const_null(cx.type_ptr()));
481-
values.push(cx.get_const_i64(0));
482-
values.push(cx.get_const_i64(0));
483-
values.push();
484-
values.push();
485-
values.push(cx.get_const_i32(0));
486-
for (value, i) in values.enumerate() {
487-
let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, cx.get_const_i32(i)]);
488-
builder.store(p, alloca, Align::EIGHT);
473+
values.push((4, cx.get_const_i32(3)));
474+
values.push((4, cx.get_const_i32(3)));
475+
values.push((8, geps.0));
476+
values.push((8, geps.1));
477+
values.push((8, geps.2));
478+
values.push((8, o_types[0]));
479+
values.push((8, cx.const_null(cx.type_ptr())));
480+
values.push((8, cx.const_null(cx.type_ptr())));
481+
values.push((8, cx.get_const_i64(0)));
482+
values.push((8, cx.get_const_i64(0)));
483+
let ti32 = cx.type_i32();
484+
let ci32_0 = cx.get_const_i32(0);
485+
values.push((8, cx.const_array(ti32, &vec![cx.get_const_i32(2097152), ci32_0, ci32_0])));
486+
values.push((8, cx.const_array(ti32, &vec![cx.get_const_i32(256), ci32_0, ci32_0])));
487+
values.push((4, cx.get_const_i32(0)));
488+
489+
for (i, value) in values.iter().enumerate() {
490+
let ptr = builder.inbounds_gep(tgt_kernel_decl, a5, &[i32_0, cx.get_const_i32(i as u64)]);
491+
builder.store(value.1, ptr, Align::from_bytes(value.0).unwrap());
489492
}
490493

491494
// Step 4)

0 commit comments

Comments
 (0)