@@ -225,6 +225,176 @@ pub unsafe fn memcpy_dtoh(
225225 Ok ( ( ) )
226226}
227227
228+ /// Similar to `cudaMemcpy2D` with `HostToDevice` copy type.
229+ ///
230+ /// `dpitch`/`spitch` is bytes between the start of two rows.
231+ /// `width` is the number of *elements* (not bytes) in a row.
232+ /// `height` is the total number of rows (not bytes).
233+ ///
234+ /// # Examples
235+ ///
236+ /// ```
237+ /// # let _context = cust::quick_init().unwrap();
238+ /// # fn foo() -> Result<(), cust::error::CudaError> {
239+ /// use cust::memory::*;
240+ /// unsafe {
241+ /// // Allocate space for a 3x3 matrix of f32s
242+ /// let (device_buffer, pitch) = cuda_malloc_pitched::<f32>(3, 3)?;
243+ ///
244+ /// let src_array: [f32; 9] = [
245+ /// 1.0, 2.0, 3.0,
246+ /// 4.0, 5.0, 6.0,
247+ /// 7.0, 8.0, 9.0];
248+ ///
249+ /// memcpy_2d_htod(
250+ /// device_buffer,
251+ /// pitch,
252+ /// src_array.as_slice().as_ptr(),
253+ /// 3*std::mem::size_of::<f32>(),
254+ /// 3,
255+ /// 3
256+ /// )?;
257+ ///
258+ /// let mut dst_array = [0.0f32; 9];
259+ ///
260+ /// memcpy_2d_dtoh(
261+ /// dst_array.as_mut_slice().as_mut_ptr(),
262+ /// 3*std::mem::size_of::<f32>(),
263+ /// device_buffer,
264+ /// pitch,
265+ /// 3,
266+ /// 3
267+ /// )?;
268+ ///
269+ /// assert_eq!(dst_array, src_array);
270+ /// cuda_free(device_buffer)?;
271+ /// }
272+ /// # Ok(())
273+ /// # }
274+ /// # foo().unwrap();
275+ /// ```
276+ #[ allow( clippy:: missing_safety_doc) ]
277+ pub unsafe fn memcpy_2d_htod < T : DeviceCopy > (
278+ dst : DevicePointer < T > ,
279+ dpitch : usize ,
280+ src : * const T ,
281+ spitch : usize ,
282+ width : usize ,
283+ height : usize ,
284+ ) -> CudaResult < ( ) > {
285+ use cust_raw:: CUmemorytype ;
286+
287+ let width_in_bytes = width. checked_mul ( std:: mem:: size_of :: < T > ( ) )
288+ . ok_or ( CudaError :: InvalidMemoryAllocation ) ?;
289+
290+ let pcopy = cust_raw:: CUDA_MEMCPY2D_st {
291+ srcXInBytes : 0 ,
292+ srcY : 0 ,
293+ srcMemoryType : CUmemorytype :: CU_MEMORYTYPE_HOST ,
294+ srcHost : src as * const c_void ,
295+ srcDevice : 0 , // Ignored
296+ srcArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
297+ srcPitch : spitch,
298+ dstXInBytes : 0 ,
299+ dstY : 0 ,
300+ dstMemoryType : CUmemorytype :: CU_MEMORYTYPE_DEVICE ,
301+ dstHost : std:: ptr:: null_mut :: < c_void > ( ) , // Ignored
302+ dstDevice : dst. as_raw ( ) ,
303+ dstArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
304+ dstPitch : dpitch,
305+ WidthInBytes : width_in_bytes,
306+ Height : height,
307+ } ;
308+
309+ crate :: sys:: cuMemcpy2D_v2 ( & pcopy) . to_result ( ) ?;
310+ Ok ( ( ) )
311+ }
312+
313+ /// Similar to `cudaMemcpy2D` with `DeviceToHost` copy type.
314+ ///
315+ /// `dpitch`/`spitch` is bytes between the start of two rows.
316+ /// `width` is the number of *elements* (not bytes) in a row.
317+ /// `height` is the total number of rows (not bytes).
318+ ///
319+ /// # Examples
320+ ///
321+ /// ```
322+ /// # let _context = cust::quick_init().unwrap();
323+ /// # fn foo() -> Result<(), cust::error::CudaError> {
324+ /// use cust::memory::*;
325+ /// unsafe {
326+ /// // Allocate space for a 3x3 matrix of f32s
327+ /// let (device_buffer, pitch) = cuda_malloc_pitched::<f32>(3, 3)?;
328+ ///
329+ /// let src_array: [f32; 9] = [
330+ /// 1.0, 2.0, 3.0,
331+ /// 4.0, 5.0, 6.0,
332+ /// 7.0, 8.0, 9.0];
333+ ///
334+ /// memcpy_2d_htod(
335+ /// device_buffer,
336+ /// pitch,
337+ /// src_array.as_slice().as_ptr(),
338+ /// 3*std::mem::size_of::<f32>(),
339+ /// 3,
340+ /// 3
341+ /// )?;
342+ ///
343+ /// let mut dst_array = [0.0f32; 9];
344+ ///
345+ /// memcpy_2d_dtoh(
346+ /// dst_array.as_mut_slice().as_mut_ptr(),
347+ /// 3*std::mem::size_of::<f32>(),
348+ /// device_buffer,
349+ /// pitch,
350+ /// 3,
351+ /// 3
352+ /// )?;
353+ ///
354+ /// assert_eq!(dst_array, src_array);
355+ /// cuda_free(device_buffer)?;
356+ /// }
357+ /// # Ok(())
358+ /// # }
359+ /// # foo().unwrap();
360+ /// ```
361+ #[ allow( clippy:: missing_safety_doc) ]
362+ pub unsafe fn memcpy_2d_dtoh < T : DeviceCopy > (
363+ dst : * mut T ,
364+ dpitch : usize ,
365+ src : DevicePointer < T > ,
366+ spitch : usize ,
367+ width : usize ,
368+ height : usize ,
369+ ) -> CudaResult < ( ) > {
370+ use cust_raw:: CUmemorytype ;
371+
372+ let width_in_bytes = width. checked_mul ( std:: mem:: size_of :: < T > ( ) )
373+ . ok_or ( CudaError :: InvalidMemoryAllocation ) ?;
374+
375+ let pcopy = cust_raw:: CUDA_MEMCPY2D_st {
376+ srcXInBytes : 0 ,
377+ srcY : 0 ,
378+ srcMemoryType : CUmemorytype :: CU_MEMORYTYPE_DEVICE ,
379+ srcHost : std:: ptr:: null_mut :: < c_void > ( ) , // Ignored
380+ srcDevice : src. as_raw ( ) ,
381+ srcArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
382+ srcPitch : spitch,
383+ dstXInBytes : 0 ,
384+ dstY : 0 ,
385+ dstMemoryType : CUmemorytype :: CU_MEMORYTYPE_HOST ,
386+ dstHost : dst as * mut c_void ,
387+ dstDevice : 0 , // Ignored
388+ dstArray : std:: ptr:: null_mut :: < cust_raw:: CUarray_st > ( ) , // Ignored
389+ dstPitch : dpitch,
390+ WidthInBytes : width_in_bytes,
391+ Height : height,
392+ } ;
393+
394+ crate :: sys:: cuMemcpy2D_v2 ( & pcopy) . to_result ( ) ?;
395+ Ok ( ( ) )
396+ }
397+
228398/// Get the current free and total memory.
229399///
230400/// Returns in `.1` the total amount of memory available to the the current context.
0 commit comments