@@ -26,8 +26,8 @@ pub(crate) fn handle_gpu_code<'ll>(
26
26
kernels. push ( kernel) ;
27
27
}
28
28
}
29
- gen_call_handling ( & cx, & kernels, & o_types) ;
30
29
generate_launcher ( & cx) ;
30
+ gen_call_handling ( & cx, & kernels, & o_types) ;
31
31
crate :: builder:: gpu_wrapper:: gen_image_wrapper_module ( & cgcx) ;
32
32
}
33
33
@@ -347,8 +347,9 @@ fn gen_call_handling<'ll>(
347
347
let ty2 = cx. type_array ( cx. type_i64 ( ) , num_args) ;
348
348
let a4 = builder. direct_alloca ( ty2, Align :: EIGHT , ".offload_sizes" ) ;
349
349
350
- let a5 = builder. direct_alloca ( tgt_kernel_decl, Align :: EIGHT , "kernel_args" ) ;
351
350
//%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
351
+ let a5 = builder. direct_alloca ( tgt_kernel_decl, Align :: EIGHT , "kernel_args" ) ;
352
+
352
353
// Now we allocate once per function param, a copy to be passed to one of our maps.
353
354
let mut vals = vec ! [ ] ;
354
355
let mut geps = vec ! [ ] ;
@@ -441,7 +442,51 @@ fn gen_call_handling<'ll>(
441
442
442
443
// Step 3)
443
444
// Here we will add code for the actual kernel launches in a follow-up PR.
445
+ //%28 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0
446
+ //store i32 3, ptr %28, align 4
447
+ //%29 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1
448
+ //store i32 3, ptr %29, align 4
449
+ //%30 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2
450
+ //store ptr %26, ptr %30, align 8
451
+ //%31 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3
452
+ //store ptr %27, ptr %31, align 8
453
+ //%32 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4
454
+ //store ptr @.offload_sizes, ptr %32, align 8
455
+ //%33 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5
456
+ //store ptr @.offload_maptypes, ptr %33, align 8
457
+ //%34 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6
458
+ //store ptr null, ptr %34, align 8
459
+ //%35 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7
460
+ //store ptr null, ptr %35, align 8
461
+ //%36 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8
462
+ //store i64 0, ptr %36, align 8
463
+ //%37 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9
464
+ //store i64 0, ptr %37, align 8
465
+ //%38 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10
466
+ //store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %38, align 4
467
+ //%39 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11
468
+ //store [3 x i32] [i32 256, i32 0, i32 0], ptr %39, align 4
469
+ //%40 = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12
470
+ //store i32 0, ptr %40, align 4
444
471
// FIXME(offload): launch kernels
472
+ let mut values = vec ! [ ] ;
473
+ values. push ( cx. get_const_i32 ( 3 ) ) ;
474
+ values. push ( cx. get_const_i32 ( 3 ) ) ;
475
+ values. push ( geps. 0 ) ;
476
+ values. push ( geps. 1 ) ;
477
+ values. push ( geps. 2 ) ;
478
+ values. push ( o_types[ 0 ] ) ;
479
+ values. push ( cx. const_null ( cx. type_ptr ( ) ) ) ;
480
+ values. push ( cx. const_null ( cx. type_ptr ( ) ) ) ;
481
+ values. push ( cx. get_const_i64 ( 0 ) ) ;
482
+ values. push ( cx. get_const_i64 ( 0 ) ) ;
483
+ values. push ( ) ;
484
+ values. push ( ) ;
485
+ values. push ( cx. get_const_i32 ( 0 ) ) ;
486
+ for ( value, i) in values. enumerate ( ) {
487
+ let gep1 = builder. inbounds_gep ( ty, a1, & [ i32_0, cx. get_const_i32 ( i) ] ) ;
488
+ builder. store ( p, alloca, Align :: EIGHT ) ;
489
+ }
445
490
446
491
// Step 4)
447
492
unsafe { llvm:: LLVMRustPositionAfter ( builder. llbuilder , kernel_call) } ;
0 commit comments