@@ -27,9 +27,25 @@ pub(crate) fn handle_gpu_code<'ll>(
27
27
}
28
28
}
29
29
gen_call_handling ( & cx, & kernels, & o_types) ;
30
+ generate_launcher ( & cx) ;
30
31
crate :: builder:: gpu_wrapper:: gen_image_wrapper_module ( & cgcx) ;
31
32
}
32
33
34
+ // ; Function Attrs: nounwind
35
+ // declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #2
36
+ fn generate_launcher < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm:: Value {
37
+ let tptr = cx. type_ptr ( ) ;
38
+ let ti64 = cx. type_i64 ( ) ;
39
+ let ti32 = cx. type_i32 ( ) ;
40
+ let args = vec ! [ tptr, ti64, ti32, ti32, tptr, tptr] ;
41
+ let tgt_fn_ty = cx. type_func ( & args, ti32) ;
42
+ let name = "__tgt_target_kernel" ;
43
+ let tgt_decl = declare_offload_fn ( & cx, name, tgt_fn_ty) ;
44
+ let nounwind = llvm:: AttributeKind :: NoUnwind . create_attr ( cx. llcx ) ;
45
+ attributes:: apply_to_llfn ( tgt_decl, Function , & [ nounwind] ) ;
46
+ tgt_decl
47
+ }
48
+
33
49
// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper:
34
50
// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
35
51
// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@@ -83,7 +99,7 @@ pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Ty
83
99
offload_entry_ty
84
100
}
85
101
86
- fn gen_tgt_kernel_global < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) {
102
+ fn gen_tgt_kernel_global < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm :: Type {
87
103
let kernel_arguments_ty = cx. type_named_struct ( "struct.__tgt_kernel_arguments" ) ;
88
104
let tptr = cx. type_ptr ( ) ;
89
105
let ti64 = cx. type_i64 ( ) ;
@@ -118,9 +134,10 @@ fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
118
134
vec ! [ ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32] ;
119
135
120
136
cx. set_struct_body ( kernel_arguments_ty, & kernel_elements, false ) ;
137
+ kernel_arguments_ty
121
138
// For now we don't handle kernels, so for now we just add a global dummy
122
139
// to make sure that the __tgt_offload_entry is defined and handled correctly.
123
- cx. declare_global ( "my_struct_global2" , kernel_arguments_ty) ;
140
+ // cx.declare_global("my_struct_global2", kernel_arguments_ty);
124
141
}
125
142
126
143
fn gen_tgt_data_mappers < ' ll > (
@@ -295,7 +312,7 @@ fn gen_call_handling<'ll>(
295
312
let tgt_bin_desc = cx. type_named_struct ( "struct.__tgt_bin_desc" ) ;
296
313
cx. set_struct_body ( tgt_bin_desc, & tgt_bin_desc_ty, false ) ;
297
314
298
- gen_tgt_kernel_global ( & cx) ;
315
+ let tgt_kernel_decl = gen_tgt_kernel_global ( & cx) ;
299
316
let ( begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers ( & cx) ;
300
317
301
318
let main_fn = cx. get_function ( "main" ) ;
@@ -329,6 +346,9 @@ fn gen_call_handling<'ll>(
329
346
// These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
330
347
let ty2 = cx. type_array ( cx. type_i64 ( ) , num_args) ;
331
348
let a4 = builder. direct_alloca ( ty2, Align :: EIGHT , ".offload_sizes" ) ;
349
+
350
+ let a5 = builder. direct_alloca ( tgt_kernel_decl, Align :: EIGHT , "kernel_args" ) ;
351
+ //%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
332
352
// Now we allocate once per function param, a copy to be passed to one of our maps.
333
353
let mut vals = vec ! [ ] ;
334
354
let mut geps = vec ! [ ] ;
0 commit comments