@@ -2,11 +2,12 @@ use std::ffi::CString;
22
33use llvm:: Linkage :: * ;
44use rustc_abi:: Align ;
5+ use rustc_codegen_ssa:: MemFlags ;
56use rustc_codegen_ssa:: common:: TypeKind ;
67use rustc_codegen_ssa:: mir:: operand:: { OperandRef , OperandValue } ;
78use rustc_codegen_ssa:: traits:: { BaseTypeCodegenMethods , BuilderMethods } ;
89use rustc_middle:: bug;
9- use rustc_middle:: ty:: offload_meta:: OffloadMetadata ;
10+ use rustc_middle:: ty:: offload_meta:: { OffloadMetadata , OffloadSize } ;
1011
1112use crate :: builder:: Builder ;
1213use crate :: common:: CodegenCx ;
@@ -427,7 +428,15 @@ pub(crate) fn gen_define_handling<'ll>(
427428 let ( sizes, transfer) : ( Vec < _ > , Vec < _ > ) =
428429 metadata. iter ( ) . map ( |m| ( m. payload_size , m. mode . bits ( ) | 0x20 ) ) . unzip ( ) ;
429430
430- let offload_sizes = add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & sizes) ;
431+ let actual_sizes = sizes
432+ . iter ( )
433+ . map ( |s| match s {
434+ OffloadSize :: Static ( sz) => * sz,
435+ OffloadSize :: Dynamic => 0 ,
436+ } )
437+ . collect :: < Vec < _ > > ( ) ;
438+ let offload_sizes =
439+ add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & actual_sizes) ;
431440 // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2),
432441 // or both to and from the gpu (=3). Other values shouldn't affect us for now.
433442 // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten
@@ -470,10 +479,6 @@ pub(crate) fn gen_define_handling<'ll>(
470479 cx. add_compiler_used_global ( offload_entry) ;
471480
472481 let result = OffloadKernelGlobals { offload_sizes, memtransfer_types, region_id } ;
473-
474- // FIXME(Sa4dUs): use this global for constant offload sizes
475- cx. add_compiler_used_global ( result. offload_sizes ) ;
476-
477482 cx. offload_kernel_cache . borrow_mut ( ) . insert ( symbol, result) ;
478483
479484 result
@@ -507,6 +512,15 @@ pub(crate) fn scalar_width<'ll>(cx: &'ll SimpleCx<'_>, ty: &'ll Type) -> u64 {
507512 }
508513}
509514
515+ fn get_runtime_size < ' ll , ' tcx > (
516+ _cx : & CodegenCx < ' ll , ' tcx > ,
517+ _val : & ' ll Value ,
518+ _meta : & OffloadMetadata ,
519+ ) -> & ' ll Value {
520+ // FIXME(Sa4dUs): handle dynamic-size data (e.g. slices)
521+ bug ! ( "offload does not support dynamic sizes yet" ) ;
522+ }
523+
510524// For each kernel *call*, we now use some of our previous declared globals to move data to and from
511525// the gpu. For now, we only handle the data transfer part of it.
512526// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu.
@@ -535,10 +549,12 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
535549 offload_dims : & OffloadKernelDims < ' ll > ,
536550) {
537551 let cx = builder. cx ;
538- let OffloadKernelGlobals { memtransfer_types , region_id , .. } = offload_data;
552+ let OffloadKernelGlobals { offload_sizes , memtransfer_types , region_id } = offload_data;
539553 let OffloadKernelDims { num_workgroups, threads_per_block, workgroup_dims, thread_dims } =
540554 offload_dims;
541555
556+ let has_dynamic = metadata. iter ( ) . any ( |m| matches ! ( m. payload_size, OffloadSize :: Dynamic ) ) ;
557+
542558 let tgt_decl = offload_globals. launcher_fn ;
543559 let tgt_target_kernel_ty = offload_globals. launcher_ty ;
544560
@@ -562,7 +578,24 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
562578 let a2 = builder. direct_alloca ( ty, Align :: EIGHT , ".offload_ptrs" ) ;
563579 // These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
564580 let ty2 = cx. type_array ( cx. type_i64 ( ) , num_args) ;
565- let a4 = builder. direct_alloca ( ty2, Align :: EIGHT , ".offload_sizes" ) ;
581+
582+ let a4 = if has_dynamic {
583+ let alloc = builder. direct_alloca ( ty2, Align :: EIGHT , ".offload_sizes" ) ;
584+
585+ builder. memcpy (
586+ alloc,
587+ Align :: EIGHT ,
588+ offload_sizes,
589+ Align :: EIGHT ,
590+ cx. get_const_i64 ( 8 * args. len ( ) as u64 ) ,
591+ MemFlags :: empty ( ) ,
592+ None ,
593+ ) ;
594+
595+ alloc
596+ } else {
597+ offload_sizes
598+ } ;
566599
567600 //%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
568601 let a5 = builder. direct_alloca ( tgt_kernel_decl, Align :: EIGHT , "kernel_args" ) ;
@@ -620,9 +653,12 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
620653 builder. store ( vals[ i as usize ] , gep1, Align :: EIGHT ) ;
621654 let gep2 = builder. inbounds_gep ( ty, a2, & [ i32_0, idx] ) ;
622655 builder. store ( geps[ i as usize ] , gep2, Align :: EIGHT ) ;
623- let gep3 = builder. inbounds_gep ( ty2, a4, & [ i32_0, idx] ) ;
624- // FIXME(offload): write an offload frontend and handle arbitrary types.
625- builder. store ( cx. get_const_i64 ( metadata[ i as usize ] . payload_size ) , gep3, Align :: EIGHT ) ;
656+
657+ if matches ! ( metadata[ i as usize ] . payload_size, OffloadSize :: Dynamic ) {
658+ let gep3 = builder. inbounds_gep ( ty2, a4, & [ i32_0, idx] ) ;
659+ let size_val = get_runtime_size ( cx, args[ i as usize ] , & metadata[ i as usize ] ) ;
660+ builder. store ( size_val, gep3, Align :: EIGHT ) ;
661+ }
626662 }
627663
628664 // For now we have a very simplistic indexing scheme into our
0 commit comments