From 4ac21c71012c8d6d80f20c4259e8533d4d708e13 Mon Sep 17 00:00:00 2001 From: Chetan Vardhan Date: Mon, 13 Jan 2025 17:22:48 +0900 Subject: [PATCH] apply runic formatting --- examples/hands_on_opencl/ex04/vadd_chain.jl | 42 +- examples/hands_on_opencl/ex05/vadd_abc.jl | 18 +- examples/hands_on_opencl/ex06/helper.jl | 14 +- examples/hands_on_opencl/ex06/matmul.jl | 27 +- examples/hands_on_opencl/ex07/helper.jl | 14 +- examples/hands_on_opencl/ex07/matmul.jl | 65 +- examples/hands_on_opencl/ex08/helper.jl | 16 +- examples/hands_on_opencl/ex08/matmul.jl | 109 +- examples/hands_on_opencl/ex09/pi_ocl.jl | 16 +- examples/hands_on_opencl/exA/pi_vocl.jl | 26 +- examples/performance.jl | 43 +- examples/vadd.jl | 20 +- examples/vadd_native.jl | 4 +- lib/cl/api.jl | 17 +- lib/cl/cmdqueue.jl | 30 +- lib/cl/context.jl | 63 +- lib/cl/device.jl | 80 +- lib/cl/error.jl | 102 +- lib/cl/event.jl | 86 +- lib/cl/intelfns.jl | 42 +- lib/cl/kernel.jl | 107 +- lib/cl/libopencl.jl | 3188 +++++++++++-------- lib/cl/memory.jl | 162 +- lib/cl/platform.jl | 8 +- lib/cl/pointer.jl | 43 +- lib/cl/program.jl | 22 +- lib/cl/state.jl | 17 +- lib/intrinsics/src/SPIRVIntrinsics.jl | 2 +- lib/intrinsics/src/atomic.jl | 167 +- lib/intrinsics/src/integer.jl | 42 +- lib/intrinsics/src/math.jl | 178 +- lib/intrinsics/src/memory.jl | 8 +- lib/intrinsics/src/pointer.jl | 16 +- lib/intrinsics/src/printf.jl | 60 +- lib/intrinsics/src/synchronization.jl | 10 +- lib/intrinsics/src/utils.jl | 36 +- lib/intrinsics/src/work_item.jl | 26 +- res/opencl_prologue.jl | 8 +- res/wrap.jl | 11 +- src/OpenCLKernels.jl | 26 +- src/array.jl | 537 ++-- src/broadcast.jl | 18 +- src/compiler/compilation.jl | 22 +- src/compiler/execution.jl | 63 +- src/compiler/reflection.jl | 13 +- src/device/array.jl | 110 +- src/device/quirks.jl | 2 +- src/gpuarrays.jl | 4 +- src/mapreduce.jl | 30 +- src/memory.jl | 107 +- src/pool.jl | 21 +- src/random.jl | 3 +- src/util.jl | 11 +- test/array.jl | 24 +- test/behaviour.jl | 148 +- test/buffer.jl | 46 +- test/cmdqueue.jl | 8 +- test/context.jl | 28 +- test/device.jl | 76 +- test/event.jl | 94 +- test/execution.jl | 188 +- test/kernel.jl | 52 +- test/kernelabstractions.jl | 10 +- test/memory.jl | 2 +- test/platform.jl | 4 +- test/program.jl | 18 +- test/runtests.jl | 168 +- test/setup.jl | 52 +- 68 files changed, 3856 insertions(+), 2974 deletions(-) diff --git a/examples/hands_on_opencl/ex04/vadd_chain.jl b/examples/hands_on_opencl/ex04/vadd_chain.jl index b05d0647..aa654c54 100644 --- a/examples/hands_on_opencl/ex04/vadd_chain.jl +++ b/examples/hands_on_opencl/ex04/vadd_chain.jl @@ -13,7 +13,7 @@ using OpenCL # tolerance used in floating point comparisons -TOL = 1e-3 +TOL = 1.0e-3 # length of vectors a, b, c LENGTH = 1024 @@ -41,7 +41,7 @@ __kernel void vadd( # create a compute context # create the compute program and build it -program = cl.Program(source=kernelsource) |> cl.build! +program = cl.Program(source = kernelsource) |> cl.build! #create a, b, e, and g vectors and fill with random float values #create empty vectors for c, d, and f @@ -62,14 +62,14 @@ h_g = rand(Float32, LENGTH) # {:use (use host buffer), :alloc (alloc pinned memory), :copy (default)} # Create the input (a, b, e, g) arrays in device memory and copy data from host -d_a = CLArray(h_a; access=:r) -d_b = CLArray(h_b; access=:r) -d_e = CLArray(h_e; access=:r) -d_g = CLArray(h_g; access=:r) +d_a = CLArray(h_a; access = :r) +d_b = CLArray(h_b; access = :r) +d_e = CLArray(h_e; access = :r) +d_g = CLArray(h_g; access = :r) # Create the output (c, d, f) array in device memory -d_c = CLArray{Float32}(undef, LENGTH; access=:w) -d_d = CLArray{Float32}(undef, LENGTH; access=:w) -d_f = CLArray{Float32}(undef, LENGTH; access=:w) +d_c = CLArray{Float32}(undef, LENGTH; access = :w) +d_d = CLArray{Float32}(undef, LENGTH; access = :w) +d_f = CLArray{Float32}(undef, LENGTH; access = :w) # create the kernel vadd = cl.Kernel(program, "vadd") @@ -81,12 +81,18 @@ vadd = cl.Kernel(program, "vadd") # here we call the kernel with work size set to the number of elements and no local # work size. This enables the opencl runtime to optimize the local size for simple # kernels -clcall(vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, - d_a, d_b, d_c, LENGTH; global_size=size(h_a)) -clcall(vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, - d_e, d_c, d_d, LENGTH; global_size=size(h_e)) -clcall(vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, - d_g, d_d, d_f, LENGTH; global_size=size(h_g)) +clcall( + vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, + d_a, d_b, d_c, LENGTH; global_size = size(h_a) +) +clcall( + vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, + d_e, d_c, d_d, LENGTH; global_size = size(h_e) +) +clcall( + vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, + d_g, d_d, d_f, LENGTH; global_size = size(h_g) +) # copy back the results from the compute device # copy!(queue, dst, src) follows same interface as julia's built in copy! @@ -100,8 +106,10 @@ for i in 1:LENGTH if tmp^2 < TOL^2 global correct += 1 else - println("tmp $tmp h_a $(h_a[i]) h_b $(h_b[i]) ", - "h_e $(h_e[i]) h_g $(h_g[i]) h_f $(h_f[i])") + println( + "tmp $tmp h_a $(h_a[i]) h_b $(h_b[i]) ", + "h_e $(h_e[i]) h_g $(h_g[i]) h_f $(h_f[i])" + ) end end diff --git a/examples/hands_on_opencl/ex05/vadd_abc.jl b/examples/hands_on_opencl/ex05/vadd_abc.jl index 7248804c..47e03e88 100644 --- a/examples/hands_on_opencl/ex05/vadd_abc.jl +++ b/examples/hands_on_opencl/ex05/vadd_abc.jl @@ -13,7 +13,7 @@ using OpenCL # tolerance used in floating point comparisons -TOL = 1e-3 +TOL = 1.0e-3 # length of vectors a, b, c LENGTH = 1024 @@ -39,7 +39,7 @@ __kernel void vadd( }" # create the compute program and build it -program = cl.Program(source=kernelsource) |> cl.build! +program = cl.Program(source = kernelsource) |> cl.build! # create a, b and c vectors and fill with random float values # (the result array will be created when reading back from the device) @@ -47,19 +47,21 @@ h_a = rand(Float32, LENGTH) h_b = rand(Float32, LENGTH) h_c = rand(Float32, LENGTH) -d_a = CLArray(h_a; access=:r) -d_b = CLArray(h_b; access=:r) -d_c = CLArray(h_c; access=:r) +d_a = CLArray(h_a; access = :r) +d_b = CLArray(h_b; access = :r) +d_c = CLArray(h_c; access = :r) # create the output (r) buffer in device memory -d_r = CLArray{Float32}(undef, LENGTH; access=:w) +d_r = CLArray{Float32}(undef, LENGTH; access = :w) # create the kernel vadd = cl.Kernel(program, "vadd") # execute the kernel over the entire range of the input -clcall(vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, - d_a, d_b, d_c, d_r, UInt32(LENGTH); global_size=size(h_a)) +clcall( + vadd, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cuint}, + d_a, d_b, d_c, d_r, UInt32(LENGTH); global_size = size(h_a) +) # read the results back from the compute device h_r = Array(d_r) diff --git a/examples/hands_on_opencl/ex06/helper.jl b/examples/hands_on_opencl/ex06/helper.jl index e847f878..91d66d7c 100644 --- a/examples/hands_on_opencl/ex06/helper.jl +++ b/examples/hands_on_opencl/ex06/helper.jl @@ -1,22 +1,22 @@ import Base: error -function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where T - cval = Float32(Pdim * AVAL * BVAL) - errsq = 0f0 +function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where {T} + cval = Float32(Pdim * AVAL * BVAL) + errsq = 0.0f0 for i in 1:Ndim for j in 1:Mdim - err = C[(i-1)*Ndim+j] - cval + err = C[(i - 1) * Ndim + j] - cval errsq += err^2 end end return errsq end -function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where T - mflops = 2.0 * Mdim * Ndim * Pdim/(1000000.0* run_time) +function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where {T} + mflops = 2.0 * Mdim * Ndim * Pdim / (1000000.0 * run_time) println("$run_time seconds at $mflops MFLOPS") errsq = error(Mdim, Ndim, Pdim, C) - if isnan(errsq) || errsq > TOL + return if isnan(errsq) || errsq > TOL println("Errors in multiplication: $errsq") end end diff --git a/examples/hands_on_opencl/ex06/matmul.jl b/examples/hands_on_opencl/ex06/matmul.jl index 9d254213..da913837 100644 --- a/examples/hands_on_opencl/ex06/matmul.jl +++ b/examples/hands_on_opencl/ex06/matmul.jl @@ -78,17 +78,20 @@ h_B = fill(Float32(BVAL), sizeB) h_C = Vector{Float32}(undef, sizeC) # %20 improvment using @inbounds -function seq_mat_mul_sdot(Mdim::Int, Ndim::Int, Pdim::Int, - A::Array{T}, B::Array{T}, C::Array{T}) where T +function seq_mat_mul_sdot( + Mdim::Int, Ndim::Int, Pdim::Int, + A::Array{T}, B::Array{T}, C::Array{T} + ) where {T} for i in 1:Ndim for j in 1:Mdim tmp = zero(Float32) for k in 1:Pdim - @inbounds tmp += A[(i-1)*Ndim+k] * B[(k-1)*Pdim+j] + @inbounds tmp += A[(i - 1) * Ndim + k] * B[(k - 1) * Pdim + j] end - @inbounds C[(i-1)*Ndim+j] = tmp + @inbounds C[(i - 1) * Ndim + j] = tmp end end + return end @info("=== Julia, matix mult (dot prod), order $ORDER ===") @@ -105,11 +108,11 @@ for i in 1:COUNT end # create OpenCL arrays -d_a = CLArray(h_A; access=:r) -d_b = CLArray(h_B; access=:r) -d_c = CLArray{Float32}(undef, length(h_C); access=:w) +d_a = CLArray(h_A; access = :r) +d_b = CLArray(h_B; access = :r) +d_c = CLArray{Float32}(undef, length(h_C); access = :w) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") @info("=== OpenCL, matrix mult, C(i, j) per work item, order $Ndim ====") @@ -122,12 +125,14 @@ for i in 1:COUNT # You can enable profiling events on the queue # by calling the constructor with the :profile flag cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size + ) wait(evt) # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 + run_time = evt.profile_duration / 1.0e9 cl.copy!(h_C, d_c) results(Mdim, Ndim, Pdim, h_C, run_time) end diff --git a/examples/hands_on_opencl/ex07/helper.jl b/examples/hands_on_opencl/ex07/helper.jl index e847f878..91d66d7c 100644 --- a/examples/hands_on_opencl/ex07/helper.jl +++ b/examples/hands_on_opencl/ex07/helper.jl @@ -1,22 +1,22 @@ import Base: error -function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where T - cval = Float32(Pdim * AVAL * BVAL) - errsq = 0f0 +function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where {T} + cval = Float32(Pdim * AVAL * BVAL) + errsq = 0.0f0 for i in 1:Ndim for j in 1:Mdim - err = C[(i-1)*Ndim+j] - cval + err = C[(i - 1) * Ndim + j] - cval errsq += err^2 end end return errsq end -function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where T - mflops = 2.0 * Mdim * Ndim * Pdim/(1000000.0* run_time) +function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where {T} + mflops = 2.0 * Mdim * Ndim * Pdim / (1000000.0 * run_time) println("$run_time seconds at $mflops MFLOPS") errsq = error(Mdim, Ndim, Pdim, C) - if isnan(errsq) || errsq > TOL + return if isnan(errsq) || errsq > TOL println("Errors in multiplication: $errsq") end end diff --git a/examples/hands_on_opencl/ex07/matmul.jl b/examples/hands_on_opencl/ex07/matmul.jl index dadc05da..0abe7cf3 100644 --- a/examples/hands_on_opencl/ex07/matmul.jl +++ b/examples/hands_on_opencl/ex07/matmul.jl @@ -59,17 +59,20 @@ h_B = fill(Float32(BVAL), sizeB) h_C = Vector{Float32}(undef, sizeC) # %20 improvment using @inbounds -function seq_mat_mul_sdot(Mdim::Int, Ndim::Int, Pdim::Int, - A::Array{T}, B::Array{T}, C::Array{T}) where T +function seq_mat_mul_sdot( + Mdim::Int, Ndim::Int, Pdim::Int, + A::Array{T}, B::Array{T}, C::Array{T} + ) where {T} for i in 1:Ndim for j in 1:Mdim tmp = zero(Float32) for k in 1:Pdim - @inbounds tmp += A[(i-1)*Ndim+k] * B[(k-1)*Pdim+j] + @inbounds tmp += A[(i - 1) * Ndim + k] * B[(k - 1) * Pdim + j] end - @inbounds C[(i-1)*Ndim+j] = tmp + @inbounds C[(i - 1) * Ndim + j] = tmp end end + return end @info("=== Julia, matix mult (dot prod), order $ORDER ===") @@ -86,16 +89,16 @@ for i in 1:COUNT end # create OpenCL array -d_a = CLArray(h_A; access=:r) -d_b = CLArray(h_B; access=:r) -d_c = CLArray{Float32}(undef, length(h_C); access=:w) +d_a = CLArray(h_A; access = :r) +d_b = CLArray(h_B; access = :r) +d_c = CLArray{Float32}(undef, length(h_C); access = :w) #-------------------------------------------------------------------------------- # OpenCL matrix multiplication ... Naive #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_elem.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") @info("=== OpenCL, matrix mult, C(i, j) per work item, order $Ndim ====") @@ -103,12 +106,14 @@ mmul = cl.Kernel(prg, "mmul") for i in 1:COUNT fill!(h_C, 0.0) cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size=(Ndim, Mdim)) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size = (Ndim, Mdim) + ) wait(evt) # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 + run_time = evt.profile_duration / 1.0e9 cl.copy!(h_C, d_c) results(Mdim, Ndim, Pdim, h_C, run_time) end @@ -119,7 +124,7 @@ end #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_row.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") @info("=== OpenCL, matrix mult, C row per work item, order $Ndim ====") @@ -130,12 +135,14 @@ for i in 1:COUNT local_size = (div(ORDER, 16),) cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size, local_size) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size, local_size + ) wait(evt) # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 + run_time = evt.profile_duration / 1.0e9 cl.copy!(h_C, d_c) results(Mdim, Ndim, Pdim, h_C, run_time) end @@ -145,27 +152,29 @@ end # OpenCL matrix multiplication ... C row per work item, A row in pivate memory #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_row_priv.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") wk_size = cl.device().max_work_group_size if Ndim * (ORDER ÷ 16) >= wk_size @warn("Specified work_size $(Ndim * (ORDER ÷ 16)) is bigger than $wk_size") else -@info("=== OpenCL, matrix mult, C row, A row in priv mem, order $Ndim ====") + @info("=== OpenCL, matrix mult, C row, A row in priv mem, order $Ndim ====") -for i in 1:COUNT - fill!(h_C, 0.0) + for i in 1:COUNT + fill!(h_C, 0.0) - cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size=Ndim, local_size=ORDER) - wait(evt) + cl.queue!(:profile) do + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size = Ndim, local_size = ORDER + ) + wait(evt) - # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 - cl.copy!(h_C, d_c) - results(Mdim, Ndim, Pdim, h_C, run_time) + # profiling events are measured in ns + run_time = evt.profile_duration / 1.0e9 + cl.copy!(h_C, d_c) + results(Mdim, Ndim, Pdim, h_C, run_time) + end end end -end diff --git a/examples/hands_on_opencl/ex08/helper.jl b/examples/hands_on_opencl/ex08/helper.jl index 22c65061..c90717d4 100644 --- a/examples/hands_on_opencl/ex08/helper.jl +++ b/examples/hands_on_opencl/ex08/helper.jl @@ -1,11 +1,11 @@ -function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where T - cval = Float32(Pdim * AVAL * BVAL) - errsq = 0f0 +function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where {T} + cval = Float32(Pdim * AVAL * BVAL) + errsq = 0.0f0 for i in 1:Ndim for j in 1:Mdim - err = C[(i-1)*Ndim+j] - cval + err = C[(i - 1) * Ndim + j] - cval if isnan(err) - println((i,j)) + println((i, j)) end errsq += err^2 end @@ -13,11 +13,11 @@ function error(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}) where T return errsq end -function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where T - mflops = 2.0 * Mdim * Ndim * Pdim/(1000000.0* run_time) +function results(Mdim::Int, Ndim::Int, Pdim::Int, C::Array{T}, run_time) where {T} + mflops = 2.0 * Mdim * Ndim * Pdim / (1000000.0 * run_time) println("$run_time seconds at $mflops MFLOPS") errsq = error(Mdim, Ndim, Pdim, C) - if isnan(errsq) || errsq > TOL + return if isnan(errsq) || errsq > TOL println("Errors in multiplication: $errsq") end end diff --git a/examples/hands_on_opencl/ex08/matmul.jl b/examples/hands_on_opencl/ex08/matmul.jl index 48ecb77c..2a8ec973 100644 --- a/examples/hands_on_opencl/ex08/matmul.jl +++ b/examples/hands_on_opencl/ex08/matmul.jl @@ -59,17 +59,20 @@ h_B = fill(Float32(BVAL), sizeB) h_C = Vector{Float32}(undef, sizeC) # %20 improvment using @inbounds -function seq_mat_mul_sdot(Mdim::Int, Ndim::Int, Pdim::Int, - A::Array{T}, B::Array{T}, C::Array{T}) where T +function seq_mat_mul_sdot( + Mdim::Int, Ndim::Int, Pdim::Int, + A::Array{T}, B::Array{T}, C::Array{T} + ) where {T} for i in 1:Ndim for j in 1:Mdim tmp = zero(Float32) for k in 1:Pdim - @inbounds tmp += A[(i-1)*Ndim+k] * B[(k-1)*Pdim+j] + @inbounds tmp += A[(i - 1) * Ndim + k] * B[(k - 1) * Pdim + j] end - @inbounds C[(i-1)*Ndim+j] = tmp + @inbounds C[(i - 1) * Ndim + j] = tmp end end + return end @info("=== Julia, matix mult (dot prod), order $ORDER ===") @@ -86,16 +89,16 @@ for i in 1:COUNT end # create OpenCL array -d_a = CLArray(h_A; access=:r) -d_b = CLArray(h_B; access=:r) -d_c = CLArray{Float32}(undef, length(h_C); access=:w) +d_a = CLArray(h_A; access = :r) +d_b = CLArray(h_B; access = :r) +d_c = CLArray{Float32}(undef, length(h_C); access = :w) #-------------------------------------------------------------------------------- # OpenCL matrix multiplication ... Naive #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_elem.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") @info("=== OpenCL, matrix mult, C(i, j) per work item, order $Ndim ====") @@ -103,12 +106,14 @@ mmul = cl.Kernel(prg, "mmul") for i in 1:COUNT fill!(h_C, 0.0) cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size=(Ndim, Mdim)) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size = (Ndim, Mdim) + ) wait(evt) # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 + run_time = evt.profile_duration / 1.0e9 cl.copy!(h_C, d_c) results(Mdim, Ndim, Pdim, h_C, run_time) end @@ -119,7 +124,7 @@ end #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_row.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") @info("=== OpenCL, matrix mult, C row per work item, order $Ndim ====") @@ -127,12 +132,14 @@ mmul = cl.Kernel(prg, "mmul") for i in 1:COUNT fill!(h_C, 0.0) cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size=Ndim, local_size=(ORDER ÷ 16)) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c; global_size = Ndim, local_size = (ORDER ÷ 16) + ) wait(evt) # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 + run_time = evt.profile_duration / 1.0e9 cl.copy!(h_C, d_c) results(Mdim, Ndim, Pdim, h_C, run_time) end @@ -142,62 +149,66 @@ end # OpenCL matrix multiplication ... C row per work item, A row in pivate memory #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_row_priv_block.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") wk_size = cl.device().max_work_group_size if Ndim * (ORDER ÷ 16) >= wk_size @warn("Specified work_size is bigger than $wk_size") else -@info("=== OpenCL, matrix mult, C row, priv A, B, cols loc, order $Ndim ====") + @info("=== OpenCL, matrix mult, C row, priv A, B, cols loc, order $Ndim ====") -for i in 1:COUNT - fill!(h_C, 0.0) - localmem = cl.LocalMem(Float32, Pdim) + for i in 1:COUNT + fill!(h_C, 0.0) + localmem = cl.LocalMem(Float32, Pdim) - cl.queue!(:profile) do - global_size = (Ndim,) - local_size = (div(ORDER, 16),) + cl.queue!(:profile) do + global_size = (Ndim,) + local_size = (div(ORDER, 16),) - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, cl.LocalMem{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c, localmem; global_size, local_size) - wait(evt) + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, cl.LocalMem{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c, localmem; global_size, local_size + ) + wait(evt) - # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 - cl.copy!(h_C, d_c) - results(Mdim, Ndim, Pdim, h_C, run_time) + # profiling events are measured in ns + run_time = evt.profile_duration / 1.0e9 + cl.copy!(h_C, d_c) + results(Mdim, Ndim, Pdim, h_C, run_time) + end end end -end #-------------------------------------------------------------------------------- # OpenCL matrix multiplication ... C row per work item, A row pivate, B col local #-------------------------------------------------------------------------------- kernel_source = read(joinpath(src_dir, "C_block_form.cl"), String) -prg = cl.Program(source=kernel_source) |> cl.build! +prg = cl.Program(source = kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") wk_size = cl.device().max_work_group_size if Ndim * (ORDER ÷ 16) >= wk_size @warn("Specified work_size is bigger than $wk_size") else -@info("=== OpenCL, matrix mult, A and B in block form in local memory, order $Ndim ====") -blocksize = 16 - -for i in 1:COUNT - fill!(h_C, 0f0) - localmem1 = cl.LocalMem(Float32, blocksize^2) - localmem2 = cl.LocalMem(Float32, blocksize^2) - cl.queue!(:profile) do - evt = clcall(mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - Mdim, Ndim, Pdim, d_a, d_b, d_c, localmem1, localmem2; global_size=Ndim, local_size=(ORDER ÷ 16)) - wait(evt) - - # profiling events are measured in ns - run_time = evt.profile_duration / 1e9 - cl.copy!(h_C, d_c) - results(Mdim, Ndim, Pdim, h_C, run_time) + @info("=== OpenCL, matrix mult, A and B in block form in local memory, order $Ndim ====") + blocksize = 16 + + for i in 1:COUNT + fill!(h_C, 0.0f0) + localmem1 = cl.LocalMem(Float32, blocksize^2) + localmem2 = cl.LocalMem(Float32, blocksize^2) + cl.queue!(:profile) do + evt = clcall( + mmul, Tuple{Int32, Int32, Int32, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + Mdim, Ndim, Pdim, d_a, d_b, d_c, localmem1, localmem2; global_size = Ndim, local_size = (ORDER ÷ 16) + ) + wait(evt) + + # profiling events are measured in ns + run_time = evt.profile_duration / 1.0e9 + cl.copy!(h_C, d_c) + results(Mdim, Ndim, Pdim, h_C, run_time) + end end end -end diff --git a/examples/hands_on_opencl/ex09/pi_ocl.jl b/examples/hands_on_opencl/ex09/pi_ocl.jl index 4688fba5..1e888f04 100644 --- a/examples/hands_on_opencl/ex09/pi_ocl.jl +++ b/examples/hands_on_opencl/ex09/pi_ocl.jl @@ -18,7 +18,7 @@ src_dir = dirname(Base.source_path()) # # Some constant values -const INSTEPS = 512*512*512 +const INSTEPS = 512 * 512 * 512 const ITERS = 262144 # Set some default values: @@ -29,7 +29,7 @@ const in_nsteps = INSTEPS const niters = ITERS kernelsource = read(joinpath(src_dir, "pi_ocl.cl"), String) -program = cl.Program(source=kernelsource) |> cl.build! +program = cl.Program(source = kernelsource) |> cl.build! # pi is a julia keyword pi_kernel = cl.Kernel(program, "pi") @@ -56,7 +56,7 @@ h_psum = Vector{Float32}(undef, nwork_groups) println("$nwork_groups work groups of size $work_group_size.") println("$nsteps integration steps") -d_partial_sums = CLArray{Float32}(undef, length(h_psum); access=:w) +d_partial_sums = CLArray{Float32}(undef, length(h_psum); access = :w) # start timer rtime = time() @@ -65,11 +65,13 @@ rtime = time() # using the maximum number of work group items for this device # Set the global and local size as tuples global_size = (nwork_groups * work_group_size,) -local_size = (work_group_size,) -localmem = cl.LocalMem(Float32, work_group_size) +local_size = (work_group_size,) +localmem = cl.LocalMem(Float32, work_group_size) -clcall(pi_kernel, Tuple{Int32, Float32, cl.LocalMem{Float32}, Ptr{Float32}}, - niters, step_size, localmem, d_partial_sums; global_size, local_size) +clcall( + pi_kernel, Tuple{Int32, Float32, cl.LocalMem{Float32}, Ptr{Float32}}, + niters, step_size, localmem, d_partial_sums; global_size, local_size +) cl.copy!(h_psum, d_partial_sums) diff --git a/examples/hands_on_opencl/exA/pi_vocl.jl b/examples/hands_on_opencl/exA/pi_vocl.jl index f9cd2d8a..d28d5b11 100644 --- a/examples/hands_on_opencl/exA/pi_vocl.jl +++ b/examples/hands_on_opencl/exA/pi_vocl.jl @@ -29,14 +29,14 @@ end vector_size = parse(Int, ARGS[1]) if vector_size == 1 - ITERS = 262144 - WGS = 8 + ITERS = 262144 + WGS = 8 elseif vector_size == 4 - ITERS = 65536 # (262144/4) - WGS = 32 + ITERS = 65536 # (262144/4) + WGS = 32 elseif vector_size == 8 - ITERS = 32768 # (262144/8) - WGS = 64 + ITERS = 32768 # (262144/8) + WGS = 64 else @warn("Invalid vector size") exit(1) @@ -53,7 +53,7 @@ work_group_size = WGS # Build program kernelsource = read(joinpath(src_dir, "pi_vocl.cl"), String) -program = cl.Program(source=kernelsource) |> cl.build! +program = cl.Program(source = kernelsource) |> cl.build! if vector_size == 1 pi_kernel = cl.Kernel(program, "pi") @@ -95,7 +95,7 @@ h_psum = Vector{Float32}(undef, nwork_groups) println("$nwork_groups work groups of size $work_group_size.") println("$nsteps integration steps") -d_partial_sums = CLArray{Float32}(undef, length(h_psum); access=:w) +d_partial_sums = CLArray{Float32}(undef, length(h_psum); access = :w) # start timer rtime = time() @@ -104,11 +104,13 @@ rtime = time() # using the maximum number of work group items for this device # Set the global and local size as tuples global_size = (nwork_groups * work_group_size,) -local_size = (work_group_size,) -localmem = cl.LocalMem(Float32, work_group_size) +local_size = (work_group_size,) +localmem = cl.LocalMem(Float32, work_group_size) -clcall(pi_kernel, Tuple{Int32, Float32, cl.LocalMem{Float32}, Ptr{Float32}}, - niters, step_size, localmem, d_partial_sums; global_size, local_size) +clcall( + pi_kernel, Tuple{Int32, Float32, cl.LocalMem{Float32}, Ptr{Float32}}, + niters, step_size, localmem, d_partial_sums; global_size, local_size +) cl.copy!(h_psum, d_partial_sums) diff --git a/examples/performance.jl b/examples/performance.jl index a7cb4dbb..87b67d60 100644 --- a/examples/performance.jl +++ b/examples/performance.jl @@ -25,8 +25,8 @@ function cl_performance(ndatapts::Integer, nworkers::Integer) @assert ndatapts > 0 @assert nworkers > 0 - a = rand(Float32, ndatapts) - b = rand(Float32, ndatapts) + a = rand(Float32, ndatapts) + b = rand(Float32, ndatapts) c = Vector{Float32}(undef, ndatapts) @printf("Size of test data: %i MB\n", sizeof(a) / 1024 / 1024) @@ -35,7 +35,7 @@ function cl_performance(ndatapts::Integer, nworkers::Integer) for i in 1:ndatapts c_temp = a[i] + b[i] c_temp = c_temp * c_temp - c[i] = c_temp * (a[i] / 2f0) + c[i] = c_temp * (a[i] / 2.0f0) end t2 = time() @@ -48,19 +48,19 @@ function cl_performance(ndatapts::Integer, nworkers::Integer) cl.device!(device) @printf("====================================================\n") - @printf("Platform name: %s\n", platform.name) - @printf("Platform profile: %s\n", platform.profile) - @printf("Platform vendor: %s\n", platform.vendor) - @printf("Platform version: %s\n", platform.version) + @printf("Platform name: %s\n", platform.name) + @printf("Platform profile: %s\n", platform.profile) + @printf("Platform vendor: %s\n", platform.vendor) + @printf("Platform version: %s\n", platform.version) @printf("----------------------------------------------------\n") @printf("Device name: %s\n", device.name) @printf("Device type: %s\n", device.device_type) - @printf("Device mem: %i MB\n", device.global_mem_size / 1024^2) + @printf("Device mem: %i MB\n", device.global_mem_size / 1024^2) @printf("Device max mem alloc: %i MB\n", device.max_mem_alloc_size / 1024^2) - @printf("Device max clock freq: %i MHZ\n", device.max_clock_frequency) - @printf("Device max compute units: %i\n", device.max_compute_units) + @printf("Device max clock freq: %i MHZ\n", device.max_clock_frequency) + @printf("Device max compute units: %i\n", device.max_compute_units) @printf("Device max work group size: %i\n", device.max_work_group_size) - @printf("Device max work item size: %s\n", device.max_work_item_size) + @printf("Device max work item size: %s\n", device.max_work_item_size) if device.max_mem_alloc_size < sizeof(Float32) * ndatapts @warn("Requested buffer size exceeds device max alloc size!") @@ -74,31 +74,34 @@ function cl_performance(ndatapts::Integer, nworkers::Integer) continue end - da = CLArray(a; access=:r) - db = CLArray(b; access=:r) - dc = CLArray{Float32}(undef, length(a); access=:w) + da = CLArray(a; access = :r) + db = CLArray(b; access = :r) + dc = CLArray{Float32}(undef, length(a); access = :w) - prg = cl.Program(source=bench_kernel) |> cl.build! + prg = cl.Program(source = bench_kernel) |> cl.build! kern = cl.Kernel(prg, "sum") # work_group_multiple = kern.prefered_work_group_size_multiple global_size = (ndatapts,) - local_size = (nworkers,) + local_size = (nworkers,) cl.queue!(:profile) do # call the kernel - evt = clcall(kern, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - da, db, dc; global_size, local_size) + evt = clcall( + kern, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + da, db, dc; global_size, local_size + ) wait(evt) # duration in ns - t = evt.profile_duration * 1e-9 + t = evt.profile_duration * 1.0e-9 @printf("Execution time of test: %.4f seconds\n", t) @info("Result norm: $(norm(c - Array(dc)))") end end end + return end # Play with these numbers to see performance differences @@ -107,5 +110,5 @@ end # ex. N_WORKERS = 1 is non parallel execution on the gpu const N_DATA_PTS = Int(2^23) # ~8 million -const N_WORKERS = Int(2^7) +const N_WORKERS = Int(2^7) cl_performance(N_DATA_PTS, N_WORKERS) diff --git a/examples/vadd.jl b/examples/vadd.jl index d07851e8..c8df8d0f 100644 --- a/examples/vadd.jl +++ b/examples/vadd.jl @@ -1,12 +1,12 @@ using OpenCL, pocl_jll, Test const source = """ - __kernel void vadd(__global const float *a, - __global const float *b, - __global float *c) { - int i = get_global_id(0); - c[i] = a[i] + b[i]; - }""" +__kernel void vadd(__global const float *a, + __global const float *b, + __global float *c) { + int i = get_global_id(0); + c[i] = a[i] + b[i]; + }""" dims = (2,) a = round.(rand(Float32, dims) * 100) @@ -21,7 +21,9 @@ prog = cl.Program(; source) |> cl.build! kern = cl.Kernel(prog, "vadd") len = prod(dims) -clcall(kern, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, - d_a, d_b, d_c; global_size=(len,)) +clcall( + kern, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}}, + d_a, d_b, d_c; global_size = (len,) +) c = Array(d_c) -@test a+b ≈ c +@test a + b ≈ c diff --git a/examples/vadd_native.jl b/examples/vadd_native.jl index 972209b9..6c2958c6 100644 --- a/examples/vadd_native.jl +++ b/examples/vadd_native.jl @@ -16,6 +16,6 @@ d_b = CLArray(b) d_c = CLArray(c) len = prod(dims) -@opencl global_size=len vadd(d_a, d_b, d_c) +@opencl global_size = len vadd(d_a, d_b, d_c) c = Array(d_c) -@test a+b ≈ c +@test a + b ≈ c diff --git a/lib/cl/api.jl b/lib/cl/api.jl index bd09d879..19b53111 100644 --- a/lib/cl/api.jl +++ b/lib/cl/api.jl @@ -66,7 +66,7 @@ function retry_reclaim(f, isfailed) end end - ret + return ret end include("libopencl.jl") @@ -148,7 +148,7 @@ const initialized = Ref{Bool}(false) if is_high_integrity_level() @warn """Running at high integrity level, preventing OpenCL.jl from loading drivers from JLLs. - Only system drivers will be available. To enable JLL drivers, do not run Julia as an administrator.""" + Only system drivers will be available. To enable JLL drivers, do not run Julia as an administrator.""" end end @@ -157,17 +157,18 @@ const initialized = Ref{Bool}(false) ocd_filenames *= ":" * ENV["OCL_ICD_FILENAMES"] end - withenv("OCL_ICD_FILENAMES"=>ocd_filenames) do + return withenv("OCL_ICD_FILENAMES" => ocd_filenames) do num_platforms = Ref{Cuint}() @ccall libopencl.clGetPlatformIDs( 0::cl_uint, C_NULL::Ptr{cl_platform_id}, - num_platforms::Ptr{cl_uint})::cl_int + num_platforms::Ptr{cl_uint} + )::cl_int if num_platforms[] == 0 && isempty(OpenCL_jll.drivers) @error """No OpenCL drivers available, either system-wide or provided by a JLL. - Please install a system-wide OpenCL driver, or load one together with OpenCL.jl, - e.g., by doing `using OpenCL, pocl_jll`.""" + Please install a system-wide OpenCL driver, or load one together with OpenCL.jl, + e.g., by doing `using OpenCL, pocl_jll`.""" end end end @@ -179,13 +180,13 @@ function __init__() # ensure that operations executed by the REPL back-end finish before returning, # because displaying values happens on a different task - if isdefined(Base, :active_repl_backend) && !isnothing(Base.active_repl_backend) + return if isdefined(Base, :active_repl_backend) && !isnothing(Base.active_repl_backend) push!(Base.active_repl_backend.ast_transforms, synchronize_opencl_tasks) end end function synchronize_opencl_tasks(ex) - quote + return quote try $(ex) finally diff --git a/lib/cl/cmdqueue.jl b/lib/cl/cmdqueue.jl index e2b4c07d..4a373db5 100644 --- a/lib/cl/cmdqueue.jl +++ b/lib/cl/cmdqueue.jl @@ -3,14 +3,14 @@ mutable struct CmdQueue <: CLObject const id::cl_command_queue - function CmdQueue(q_id::cl_command_queue; retain::Bool=false) + function CmdQueue(q_id::cl_command_queue; retain::Bool = false) q = new(q_id) retain && clRetainCommandQueue(q) finalizer(q) do _ - # this is to prevent `device_synchronize()` operating on freed queues. - # XXX: why does the WeakKeyDict contain freed objects? - delete!(cl.queues, q) - clReleaseCommandQueue(q) + # this is to prevent `device_synchronize()` operating on freed queues. + # XXX: why does the WeakKeyDict contain freed objects? + delete!(cl.queues, q) + clReleaseCommandQueue(q) end return q end @@ -20,8 +20,8 @@ Base.unsafe_convert(::Type{cl_command_queue}, q::CmdQueue) = q.id function Base.show(io::IO, q::CmdQueue) ptr_val = convert(UInt, pointer(q)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.CmdQueue(@$ptr_address)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.CmdQueue(@$ptr_address)") end function CmdQueue(prop::Symbol) @@ -36,7 +36,7 @@ function CmdQueue(prop::Symbol) return CmdQueue(flags) end -function CmdQueue(props::NTuple{2,Symbol}) +function CmdQueue(props::NTuple{2, Symbol}) if !(:out_of_order in props && :profile in props) throw(ArgumentError("Only :out_of_order and :profile flags are vaid, unrecognized flags $props")) end @@ -44,7 +44,7 @@ function CmdQueue(props::NTuple{2,Symbol}) return CmdQueue(flags) end -function CmdQueue(ctx, dev, flags=cl_command_queue_properties(0)) +function CmdQueue(ctx, dev, flags = cl_command_queue_properties(0)) err_code = Ref{Cint}() queue_id = clCreateCommandQueue(ctx, dev, flags, err_code) if err_code[] != CL_SUCCESS @@ -56,7 +56,7 @@ function CmdQueue(ctx, dev, flags=cl_command_queue_properties(0)) return CmdQueue(queue_id) end -function CmdQueue(flags=cl_command_queue_properties(0)) +function CmdQueue(flags = cl_command_queue_properties(0)) return CmdQueue(context(), device(), flags) end @@ -74,7 +74,7 @@ function Base.getproperty(q::CmdQueue, s::Symbol) if s == :context ctx_id = Ref{cl_context}() clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), ctx_id, C_NULL) - return Context(ctx_id[], retain=true) + return Context(ctx_id[], retain = true) elseif s == :device dev_id = Ref{cl_device_id}() clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(cl_device_id), dev_id, C_NULL) @@ -85,8 +85,10 @@ function Base.getproperty(q::CmdQueue, s::Symbol) return Int(ref_count[]) elseif s == :properties props = Ref{cl_command_queue_properties}() - clGetCommandQueueInfo(q, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), - props, C_NULL) + clGetCommandQueueInfo( + q, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), + props, C_NULL + ) return props[] else return getfield(q, s) @@ -96,7 +98,7 @@ end function global_queue(ctx::Context, dev::Device) # NOTE: dev purposefully does not default to context() or device() to stress that # objects should track ownership, and not rely on implicit global state. - get!(task_local_storage(), (:CLCommandQueue, ctx, dev)) do + return get!(task_local_storage(), (:CLCommandQueue, ctx, dev)) do CmdQueue(ctx, dev) end end diff --git a/lib/cl/context.jl b/lib/cl/context.jl index 74dc02a4..63eea788 100644 --- a/lib/cl/context.jl +++ b/lib/cl/context.jl @@ -6,7 +6,7 @@ mutable struct Context <: CLObject # If created from ctx_id already, we need to increase the reference count # because then we give out multiple context references with multiple finalizers to the world # TODO should we make it in a way, that you can't overwrite it? - function Context(ctx_id::cl_context; retain::Bool=false) + function Context(ctx_id::cl_context; retain::Bool = false) ctx = new(ctx_id) retain && clRetainContext(ctx) finalizer(clReleaseContext, ctx) @@ -20,8 +20,8 @@ function Base.show(io::IO, ctx::Context) dev_strs = [replace(d.name, r"\s+" => " ") for d in ctx.devices] devs_str = join(dev_strs, ",") ptr_val = convert(UInt, pointer(ctx)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.Context(@$ptr_address on $devs_str)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.Context(@$ptr_address on $devs_str)") end struct _CtxErr @@ -33,7 +33,7 @@ end const io_lock = ReentrantLock() function log_error(message...) - @async begin + return @async begin lock(stderr) lock(io_lock) print(stderr, string(message..., "\n")) @@ -51,17 +51,21 @@ function ctx_notify_err( end -ctx_callback_ptr() = @cfunction(ctx_notify_err, Nothing, - (Ptr{Cchar}, Ptr{Nothing}, Csize_t, Ptr{Nothing})) +ctx_callback_ptr() = @cfunction( + ctx_notify_err, Nothing, + (Ptr{Cchar}, Ptr{Nothing}, Csize_t, Ptr{Nothing}) +) function raise_context_error(err_info, private_info, cb) log_error("OpenCL Error: | ", unsafe_string(err_info), " |") return end -function Context(devs::Vector{Device}; - properties=nothing, - callback::Union{Function, Nothing} = nothing) +function Context( + devs::Vector{Device}; + properties = nothing, + callback::Union{Function, Nothing} = nothing + ) if isempty(devs) ArgumentError("No devices specified for context") end @@ -82,7 +86,8 @@ function Context(devs::Vector{Device}; f_ptr = @cfunction($payload, Nothing, (Ptr{Cchar}, Ptr{Nothing}, Csize_t)) ctx_id = clCreateContext( ctx_properties, n_devices, device_ids, - ctx_callback_ptr(), f_ptr, err_code) + ctx_callback_ptr(), f_ptr, err_code + ) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) end @@ -90,8 +95,8 @@ function Context(devs::Vector{Device}; end -Context(d::Device; properties=nothing, callback=nothing) = - Context([d], properties=properties, callback=callback) +Context(d::Device; properties = nothing, callback = nothing) = + Context([d], properties = properties, callback = callback) function Context(dev_type; properties = nothing, callback = nothing) if properties !== nothing @@ -106,18 +111,24 @@ function Context(dev_type; properties = nothing, callback = nothing) end err_code = Ref{Cint}() ctx_user_data = @cfunction($ctx_user_data_cb, Nothing, (Ptr{Cchar}, Ptr{Nothing}, Csize_t)) - ctx_id = clCreateContextFromType(ctx_properties, dev_type, - ctx_callback_ptr(), ctx_user_data, err_code) + ctx_id = clCreateContextFromType( + ctx_properties, dev_type, + ctx_callback_ptr(), ctx_user_data, err_code + ) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) end return Context(ctx_id) end -function Context(dev_type::Symbol; - properties=nothing, callback=nothing) - Context(cl_device_type(dev_type), - properties=properties, callback=callback) +function Context( + dev_type::Symbol; + properties = nothing, callback = nothing + ) + return Context( + cl_device_type(dev_type), + properties = properties, callback = callback + ) end function Base.getproperty(ctx::Context, s::Symbol) @@ -149,15 +160,15 @@ function Base.getproperty(ctx::Context, s::Symbol) result = Any[] for i in 1:2:nprops key = props[i] - value = i < nprops ? props[i+1] : nothing + value = i < nprops ? props[i + 1] : nothing if key == CL_CONTEXT_PLATFORM push!(result, (key, Platform(cl_platform_id(value)))) elseif key == CL_GL_CONTEXT_KHR || - key == CL_EGL_DISPLAY_KHR || - key == CL_GLX_DISPLAY_KHR || - key == CL_WGL_HDC_KHR || - key == CL_CGL_SHAREGROUP_KHR + key == CL_EGL_DISPLAY_KHR || + key == CL_GLX_DISPLAY_KHR || + key == CL_WGL_HDC_KHR || + key == CL_CGL_SHAREGROUP_KHR push!(result, (key, value)) elseif key == 0 if i != nprops @@ -198,9 +209,9 @@ function _parse_properties(props) elseif Sys.isapple() ? (prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE) : false push!(cl_props, cl_context_properties(val)) elseif prop == CL_GL_CONTEXT_KHR || - prop == CL_EGL_DISPLAY_KHR || - prop == CL_GLX_DISPLAY_KHR || - prop == CL_CGL_SHAREGROUP_KHR + prop == CL_EGL_DISPLAY_KHR || + prop == CL_GLX_DISPLAY_KHR || + prop == CL_CGL_SHAREGROUP_KHR push!(cl_props, cl_context_properties(val)) else throw(OpenCLException("Invalid OpenCL Context property")) diff --git a/lib/cl/device.jl b/lib/cl/device.jl index 8c6d87d9..f6e8d76f 100644 --- a/lib/cl/device.jl +++ b/lib/cl/device.jl @@ -11,8 +11,8 @@ function Base.show(io::IO, d::Device) device_name = replace(d.name, strip_extra_whitespace => " ") platform_name = replace(d.platform.name, strip_extra_whitespace => " ") ptr_val = convert(UInt, pointer(d)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.Device($device_name on $platform_name @$ptr_address)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.Device($device_name on $platform_name @$ptr_address)") end @inline function Base.getproperty(d::Device, s::Symbol) @@ -140,17 +140,17 @@ end end if s == :max_image2d_shape - width = Ref{Csize_t}() + width = Ref{Csize_t}() height = Ref{Csize_t}() - clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL) + clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL) clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL) return (width[], height[]) end if s == :max_image3d_shape - width = Ref{Csize_t}() + width = Ref{Csize_t}() height = Ref{Csize_t}() - depth = Ref{Csize_t}() + depth = Ref{Csize_t}() clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL) clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL) clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(Csize_t), depth, C_NULL) @@ -160,14 +160,18 @@ end return getfield(d, s) end -function queue_properties(d::Device, type=:host) +function queue_properties(d::Device, type = :host) result = Ref{cl_command_queue_properties}() if type === :host - clGetDeviceInfo(d, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, - sizeof(cl_command_queue_properties), result, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + sizeof(cl_command_queue_properties), result, C_NULL + ) elseif type === :device - clGetDeviceInfo(d, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - sizeof(cl_command_queue_properties), result, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + sizeof(cl_command_queue_properties), result, C_NULL + ) else throw(ArgumentError("Unknown queue type: $type")) end @@ -175,14 +179,16 @@ function queue_properties(d::Device, type=:host) return (; out_of_order_exec = mask & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE != 0, - profiling = mask & CL_QUEUE_PROFILING_ENABLE != 0 + profiling = mask & CL_QUEUE_PROFILING_ENABLE != 0, ) end function exec_capabilities(d::Device) result = Ref{cl_device_exec_capabilities}() - clGetDeviceInfo(d, CL_DEVICE_EXECUTION_CAPABILITIES, - sizeof(cl_device_exec_capabilities), result, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_EXECUTION_CAPABILITIES, + sizeof(cl_device_exec_capabilities), result, C_NULL + ) mask = result[] return (; @@ -192,33 +198,43 @@ end function usm_capabilities(d::Device) result1 = Ref{cl_device_unified_shared_memory_capabilities_intel}() - clGetDeviceInfo(d, CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, - sizeof(cl_device_unified_shared_memory_capabilities_intel), result1, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, + sizeof(cl_device_unified_shared_memory_capabilities_intel), result1, C_NULL + ) result2 = Ref{cl_device_unified_shared_memory_capabilities_intel}() - clGetDeviceInfo(d, CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, - sizeof(cl_device_unified_shared_memory_capabilities_intel), result2, C_NULL) - + clGetDeviceInfo( + d, CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + sizeof(cl_device_unified_shared_memory_capabilities_intel), result2, C_NULL + ) + result3 = Ref{cl_device_unified_shared_memory_capabilities_intel}() - clGetDeviceInfo(d, CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, - sizeof(cl_device_unified_shared_memory_capabilities_intel), result3, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + sizeof(cl_device_unified_shared_memory_capabilities_intel), result3, C_NULL + ) result4 = Ref{cl_device_unified_shared_memory_capabilities_intel}() - clGetDeviceInfo(d, CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, - sizeof(cl_device_unified_shared_memory_capabilities_intel), result4, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, + sizeof(cl_device_unified_shared_memory_capabilities_intel), result4, C_NULL + ) result5 = Ref{cl_device_unified_shared_memory_capabilities_intel}() - clGetDeviceInfo(d, CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, - sizeof(cl_device_unified_shared_memory_capabilities_intel), result5, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + sizeof(cl_device_unified_shared_memory_capabilities_intel), result5, C_NULL + ) mask = (result1[], result2[], result3[], result4[], result5[]) - + function retmask(m) - return (; + return (; usm_access = m & CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL != 0, usm_atomic_access = m & CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL != 0, - usm_concurrent_access = m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL !=0, - usm_concurrent_atomic_acces = m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL !=0, + usm_concurrent_access = m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL != 0, + usm_concurrent_atomic_acces = m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL != 0, ) end @@ -233,8 +249,10 @@ end function svm_capabilities(d::Device) result = Ref{cl_device_svm_capabilities}() - clGetDeviceInfo(d, CL_DEVICE_SVM_CAPABILITIES, - sizeof(cl_device_svm_capabilities), result, C_NULL) + clGetDeviceInfo( + d, CL_DEVICE_SVM_CAPABILITIES, + sizeof(cl_device_svm_capabilities), result, C_NULL + ) mask = result[] return (; diff --git a/lib/cl/error.jl b/lib/cl/error.jl index 2fb2e242..07bc19bb 100644 --- a/lib/cl/error.jl +++ b/lib/cl/error.jl @@ -1,14 +1,14 @@ const _cl_error_codes = Dict{Int, Symbol}( - +0 => :CL_SUCCESS, - -1 => :CL_DEVICE_NOT_FOUND, - -2 => :CL_DEVICE_NOT_AVAILABLE, - -3 => :CL_COMPILER_NOT_AVAILABLE, - -4 => :CL_MEM_OBJECT_ALLOCATION_FAILURE, - -5 => :CL_OUT_OF_RESOURCES, - -6 => :CL_OUT_OF_HOST_MEMORY, - -7 => :CL_PROFILING_INFO_NOT_AVAILABLE, - -8 => :CL_MEM_COPY_OVERLAP, - -9 => :CL_IMAGE_FORMAT_MISMATCH, + +0 => :CL_SUCCESS, + -1 => :CL_DEVICE_NOT_FOUND, + -2 => :CL_DEVICE_NOT_AVAILABLE, + -3 => :CL_COMPILER_NOT_AVAILABLE, + -4 => :CL_MEM_OBJECT_ALLOCATION_FAILURE, + -5 => :CL_OUT_OF_RESOURCES, + -6 => :CL_OUT_OF_HOST_MEMORY, + -7 => :CL_PROFILING_INFO_NOT_AVAILABLE, + -8 => :CL_MEM_COPY_OVERLAP, + -9 => :CL_IMAGE_FORMAT_MISMATCH, -10 => :CL_IMAGE_FORMAT_NOT_SUPPORTED, -11 => :CL_BUILD_PROGRAM_FAILURE, -12 => :CL_MAP_FAILURE, @@ -87,80 +87,92 @@ const _cl_error_codes = Dict{Int, Symbol}( const _cl_err_desc = Dict{Integer, String}( CL_INVALID_CONTEXT => - "Context is not a valid context.", + "Context is not a valid context.", CL_INVALID_BUFFER_SIZE => - "Buffer size is 0", + "Buffer size is 0", CL_INVALID_EVENT => - "Event objects specified in event_list are not valid event objects", + "Event objects specified in event_list are not valid event objects", CL_INVALID_HOST_PTR => - string("If host_ptr is NULL and CL_MEM_USE_HOST_PTR or ", - "CL_MEM_COPY_HOST_PTR are set in flags or if host_ptr is not NULL but ", - "CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in flags."), + string( + "If host_ptr is NULL and CL_MEM_USE_HOST_PTR or ", + "CL_MEM_COPY_HOST_PTR are set in flags or if host_ptr is not NULL but ", + "CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in flags." + ), CL_MEM_OBJECT_ALLOCATION_FAILURE => - "Failure to allocate memory for buffer object.", + "Failure to allocate memory for buffer object.", CL_OUT_OF_RESOURCES => - "Failure to allocate resources required by the OpenCL implementation on the device.", + "Failure to allocate resources required by the OpenCL implementation on the device.", CL_OUT_OF_HOST_MEMORY => - "Failure to allocate resources required by the OpenCL implementation on the host", + "Failure to allocate resources required by the OpenCL implementation on the host", CL_INVALID_PROGRAM => - "Program is not a valid program object.", + "Program is not a valid program object.", CL_INVALID_VALUE => - "CL_INVALID_VALUE: this one should have been caught by julia!", + "CL_INVALID_VALUE: this one should have been caught by julia!", CL_INVALID_DEVICE => - "OpenCL devices listed in device_list are not in the list of devices associated with program.", + "OpenCL devices listed in device_list are not in the list of devices associated with program.", CL_INVALID_BINARY => - string("program is created with clCreateWithProgramBinary and devices listed in ", - "device_list do not have a valid program binary loaded."), + string( + "program is created with clCreateWithProgramBinary and devices listed in ", + "device_list do not have a valid program binary loaded." + ), CL_INVALID_BUILD_OPTIONS => - "The build options specified by options are invalid.", + "The build options specified by options are invalid.", CL_INVALID_OPERATION => - string("The build of a program executable for any of the devices listed in device_list by a ", - "previous call to clBuildProgram for program has not completed."), + string( + "The build of a program executable for any of the devices listed in device_list by a ", + "previous call to clBuildProgram for program has not completed." + ), CL_COMPILER_NOT_AVAILABLE => - "Program is created with clCreateProgramWithSource and a compiler is not available", + "Program is created with clCreateProgramWithSource and a compiler is not available", CL_BUILD_PROGRAM_FAILURE => - string("Failure to build the program executable. ", - "This error will be returned if clBuildProgram ", - "does not return until the build has completed"), + string( + "Failure to build the program executable. ", + "This error will be returned if clBuildProgram ", + "does not return until the build has completed" + ), CL_INVALID_OPERATION => - "There are kernel objects attached to program.", + "There are kernel objects attached to program.", CL_OUT_OF_HOST_MEMORY => - "if there is a failure to allocate resources required by the OpenCL implementation on the host.", + "if there is a failure to allocate resources required by the OpenCL implementation on the host.", CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST => - "The execution status of any of the events in event_list is a negative integer value", + "The execution status of any of the events in event_list is a negative integer value", CL_INVALID_PROGRAM_EXECUTABLE => - "there is no successfully built executable for program", + "there is no successfully built executable for program", CL_INVALID_KERNEL_NAME => - "kernel_name is not found in program.", + "kernel_name is not found in program.", CL_INVALID_KERNEL_DEFINITION => - string("The function definition for __kernel function ", - "given by kernel_name such as the number of arguments, the argument types are not the ", - "same for all devices for which the program executable has been built"), + string( + "The function definition for __kernel function ", + "given by kernel_name such as the number of arguments, the argument types are not the ", + "same for all devices for which the program executable has been built" + ), CL_PROFILING_INFO_NOT_AVAILABLE => - string("The CL_QUEUE_PROFILING_ENABLE flag ", - "is not set for the command-queue, if the execution status of the command identified by ", - "event is not CL_COMPLETE or if event is a user event objec"), + string( + "The CL_QUEUE_PROFILING_ENABLE flag ", + "is not set for the command-queue, if the execution status of the command identified by ", + "event is not CL_COMPLETE or if event is a user event objec" + ), ) struct CLMemoryError <: Exception @@ -180,13 +192,13 @@ struct CLError <: Exception desc::Symbol function CLError(c::Integer) - new(c, get(_cl_error_codes, Int(c), :CL_UNKNOWN_ERROR_CODE)) + return new(c, get(_cl_error_codes, Int(c), :CL_UNKNOWN_ERROR_CODE)) end end Base.show(io::IO, err::CLError) = - Base.print(io, "CLError(code=$(err.code), $(err.desc))") + Base.print(io, "CLError(code=$(err.code), $(err.desc))") function error_description(err::CLError) - get(_cl_err_desc, err.code, "no description for error $(err.code)") + return get(_cl_err_desc, err.code, "no description for error $(err.code)") end diff --git a/lib/cl/event.jl b/lib/cl/event.jl index 42a914cf..7581cfde 100644 --- a/lib/cl/event.jl +++ b/lib/cl/event.jl @@ -5,7 +5,7 @@ abstract type AbstractEvent <: CLObject end mutable struct Event <: AbstractEvent const id::cl_event - function Event(evt_id; retain::Bool=false) + function Event(evt_id; retain::Bool = false) evt = new(evt_id) retain && clRetainEvent(evt) finalizer(clReleaseEvent, evt) @@ -18,21 +18,21 @@ mutable struct NannyEvent <: AbstractEvent const id::cl_event const obj::Any - function NannyEvent(evt_id, obj; retain::Bool=false) + function NannyEvent(evt_id, obj; retain::Bool = false) nanny_evt = new(evt_id, obj) retain && clRetainEvent(nanny_evt) finalizer(clReleaseEvent, nanny_evt) - nanny_evt + return nanny_evt end end -NannyEvent(evt::Event, obj; retain=false) = NannyEvent(evt.id, obj; retain) +NannyEvent(evt::Event, obj; retain = false) = NannyEvent(evt.id, obj; retain) macro return_event(evt) - quote + return quote evt = $(esc(evt)) try - return Event(evt, retain=false) + return Event(evt, retain = false) catch err clReleaseEvent(evt) throw(err) @@ -41,7 +41,7 @@ macro return_event(evt) end macro return_nanny_event(evt, obj) - quote + return quote evt = $(esc(evt)) try return NannyEvent(evt, $(esc(obj))) @@ -56,14 +56,14 @@ Base.unsafe_convert(::Type{cl_event}, evt::AbstractEvent) = evt.id function Base.show(io::IO, evt::Event) ptr_val = convert(UInt, pointer(evt)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.Event(@$ptr_address)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.Event(@$ptr_address)") end mutable struct UserEvent <: AbstractEvent const id::cl_event - function UserEvent(evt_id::cl_event, retain::Bool=false) + function UserEvent(evt_id::cl_event, retain::Bool = false) evt = new(evt_id) retain && clRetainEvent(evt) finalizer(clReleaseEvent, evt) @@ -71,7 +71,7 @@ mutable struct UserEvent <: AbstractEvent end end -function UserEvent(; retain=false) +function UserEvent(; retain = false) status = Ref{Cint}() evt_id = clCreateUserEvent(context(), status) if status[] != CL_SUCCESS @@ -87,8 +87,8 @@ end function Base.show(io::IO, evt::UserEvent) ptr_val = convert(UInt, pointer(evt)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.UserEvent(@$ptr_address)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.UserEvent(@$ptr_address)") end function complete(evt::UserEvent) @@ -111,17 +111,19 @@ function event_notify(evt_id::cl_event, status::Cint, payload::Ptr{Nothing}) # Use uv_async_send to notify the main thread ccall(:uv_async_send, Nothing, (Ptr{Nothing},), handle) - nothing + return nothing end function add_callback(evt::AbstractEvent, callback::Function) - event_notify_ptr = @cfunction(event_notify, Nothing, - (cl_event, Cint, Ptr{Cvoid})) + event_notify_ptr = @cfunction( + event_notify, Nothing, + (cl_event, Cint, Ptr{Cvoid}) + ) # The uv_callback is going to notify a task that, # then executes the real callback. cb = Base.AsyncCondition() - GC.@preserve cb begin + return GC.@preserve cb begin # Storing the results of our c_callback needs to be # isbits && isimmutable @@ -130,15 +132,15 @@ function add_callback(evt::AbstractEvent, callback::Function) clSetEventCallback(evt, CL_COMPLETE, event_notify_ptr, r_ecb) @async begin - try - Base.wait(cb) - ecb = r_ecb[] - callback(ecb.evt_id, ecb.status) - catch - rethrow() - finally - Base.close(cb) - end + try + Base.wait(cb) + ecb = r_ecb[] + callback(ecb.evt_id, ecb.status) + catch + rethrow() + finally + Base.close(cb) + end end end end @@ -162,38 +164,42 @@ function enqueue_marker_with_wait_list(wait_for::Vector{AbstractEvent}) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] ret_evt = Ref{cl_event}() - clEnqueueMarkerWithWaitList(queue(), n_wait_events, - isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, - ret_evt) - @return_event ret_evt[] + clEnqueueMarkerWithWaitList( + queue(), n_wait_events, + isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, + ret_evt + ) + return @return_event ret_evt[] end function enqueue_barrier_with_wait_list(wait_for::Vector{AbstractEvent}) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] ret_evt = Ref{cl_event}() - clEnqueueBarrierWithWaitList(queue(), n_wait_events, - isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, - ret_evt) - @return_event ret_evt[] + clEnqueueBarrierWithWaitList( + queue(), n_wait_events, + isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, + ret_evt + ) + return @return_event ret_evt[] end function enqueue_marker() evt = Ref{cl_event}() clEnqueueMarker(queue(), evt) - @return_event evt[] + return @return_event evt[] end @deprecate enqueue_marker enqueue_marker_with_wait_list -function enqueue_wait_for_events(wait_for::Vector{T}) where {T<:AbstractEvent} +function enqueue_wait_for_events(wait_for::Vector{T}) where {T <: AbstractEvent} wait_evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] - GC.@preserve wait_for begin + return GC.@preserve wait_for begin clEnqueueWaitForEvents(queue(), length(wait_for), wait_evt_ids) - end + end end function enqueue_wait_for_events(wait_for::AbstractEvent) - enqueue_wait_for_events([wait_for]) + return enqueue_wait_for_events([wait_for]) end function enqueue_barrier() @@ -267,7 +273,7 @@ function Base.getproperty(evt::AbstractEvent, s::Symbol) throw(ArgumentError("Unknown status value: $status")) end - # profiling properties + # profiling properties elseif s == :profile_start return profiling_info(evt, CL_PROFILING_COMMAND_START) elseif s == :profile_end diff --git a/lib/cl/intelfns.jl b/lib/cl/intelfns.jl index 0c3be81f..dc8dec8f 100644 --- a/lib/cl/intelfns.jl +++ b/lib/cl/intelfns.jl @@ -1,55 +1,55 @@ ocl_extension(s) = cl.clGetExtensionFunctionAddressForPlatform(cl.platform(), s) function ext_clHostMemAllocINTEL(context, properties, size, alignment, errcode_ret) - ocl_intel = ocl_extension("clHostMemAllocINTEL") + ocl_intel = ocl_extension("clHostMemAllocINTEL") - ccall(ocl_intel, Ptr{Cvoid}, (cl.cl_context, Ptr{cl.cl_mem_properties_intel}, Csize_t, cl.cl_uint, Ptr{cl.cl_int}), context, properties, size, alignment, errcode_ret) + return ccall(ocl_intel, Ptr{Cvoid}, (cl.cl_context, Ptr{cl.cl_mem_properties_intel}, Csize_t, cl.cl_uint, Ptr{cl.cl_int}), context, properties, size, alignment, errcode_ret) end function ext_clDeviceMemAllocINTEL(context, device, properties, size, alignment, errcode_ret) - ocl_intel = ocl_extension("clDeviceMemAllocINTEL") - - @ccall $ocl_intel(context::cl.cl_context, device::cl.cl_device_id, properties::Ptr{cl.cl_mem_properties_intel}, size::Csize_t, alignment::cl.cl_uint, errcode_ret::Ptr{cl.cl_int})::Ptr{Cvoid} + ocl_intel = ocl_extension("clDeviceMemAllocINTEL") + + return @ccall $ocl_intel(context::cl.cl_context, device::cl.cl_device_id, properties::Ptr{cl.cl_mem_properties_intel}, size::Csize_t, alignment::cl.cl_uint, errcode_ret::Ptr{cl.cl_int})::Ptr{Cvoid} end function ext_clSharedMemAllocINTEL(context, device, properties, size, alignment, errcode_ret) - ocl_intel = ocl_extension("clSharedMemAllocINTEL") - - @ccall $ocl_intel(context::cl.cl_context, device::cl.cl_device_id, properties::Ptr{cl.cl_mem_properties_intel}, size::Csize_t, alignment::cl.cl_uint, errcode_ret::Ptr{cl.cl_int})::Ptr{Cvoid} + ocl_intel = ocl_extension("clSharedMemAllocINTEL") + + return @ccall $ocl_intel(context::cl.cl_context, device::cl.cl_device_id, properties::Ptr{cl.cl_mem_properties_intel}, size::Csize_t, alignment::cl.cl_uint, errcode_ret::Ptr{cl.cl_int})::Ptr{Cvoid} end function ext_clMemFreeINTEL(context, ptr) - ocl_intel = ocl_extension("clMemFreeINTEL") - - @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid})::cl.cl_int + ocl_intel = ocl_extension("clMemFreeINTEL") + + return @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid})::cl.cl_int end function ext_clMemBlockingFreeINTEL(context, ptr) - ocl_intel = ocl_extension("clMemBlockingFreeINTEL") - - @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid})::cl.cl_int + ocl_intel = ocl_extension("clMemBlockingFreeINTEL") + + return @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid})::cl.cl_int end function ext_clGetMemAllocInfoINTEL(context, ptr, param_name, param_value_size, param_value, param_value_size_ret) - ocl_intel = ocl_extension("clGetMemAllocInfoINTEL") - - @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid}, param_name::cl.cl_mem_info_intel, param_value_size::Csize_t, param_value::Ptr{Cvoid}, param_value_size_ret::Ptr{Csize_t})::cl.cl_int + ocl_intel = ocl_extension("clGetMemAllocInfoINTEL") + + return @ccall $ocl_intel(context::cl.cl_context, ptr::PtrOrCLPtr{Cvoid}, param_name::cl.cl_mem_info_intel, param_value_size::Csize_t, param_value::Ptr{Cvoid}, param_value_size_ret::Ptr{Csize_t})::cl.cl_int end function ext_clEnqueueMemcpyINTEL(command_queue, blocking, dst_ptr, src_ptr, size, num_events_in_wait_list, event_wait_list, event) ocl_intel = ocl_extension("clEnqueueMemcpyINTEL") - @ccall $ocl_intel(command_queue::cl_command_queue, blocking::cl_bool, dst_ptr::PtrOrCLPtr{Cvoid}, src_ptr::PtrOrCLPtr{Cvoid}, size::Csize_t, num_events_in_wait_list::cl_uint, event_wait_list::Ptr{cl_event}, event::Ptr{cl_event})::cl_int + return @ccall $ocl_intel(command_queue::cl_command_queue, blocking::cl_bool, dst_ptr::PtrOrCLPtr{Cvoid}, src_ptr::PtrOrCLPtr{Cvoid}, size::Csize_t, num_events_in_wait_list::cl_uint, event_wait_list::Ptr{cl_event}, event::Ptr{cl_event})::cl_int end function ext_clEnqueueMemFillINTEL(command_queue, dst_ptr, pattern, pattern_size, size, num_events_in_wait_list, event_wait_list, event) ocl_intel = ocl_extension("clEnqueueMemFillINTEL") - @ccall $ocl_intel(command_queue::cl_command_queue, dst_ptr::PtrOrCLPtr{Cvoid}, pattern::Ptr{Cvoid}, pattern_size::Csize_t, size::Csize_t, num_events_in_wait_list::cl_uint, event_wait_list::Ptr{cl_event}, event::Ptr{cl_event})::cl_int + return @ccall $ocl_intel(command_queue::cl_command_queue, dst_ptr::PtrOrCLPtr{Cvoid}, pattern::Ptr{Cvoid}, pattern_size::Csize_t, size::Csize_t, num_events_in_wait_list::cl_uint, event_wait_list::Ptr{cl_event}, event::Ptr{cl_event})::cl_int end function ext_clSetKernelArgMemPointerINTEL(kernel, arg_index, arg_value) - ocl_intel = ocl_extension("clSetKernelArgMemPointerINTEL") + ocl_intel = ocl_extension("clSetKernelArgMemPointerINTEL") - @ccall $ocl_intel(kernel::cl_kernel, arg_index::cl_uint, arg_value::Ptr{Cvoid})::cl_int + return @ccall $ocl_intel(kernel::cl_kernel, arg_index::cl_uint, arg_value::Ptr{Cvoid})::cl_int end diff --git a/lib/cl/kernel.jl b/lib/cl/kernel.jl index c2b3facd..e8bfcd7b 100644 --- a/lib/cl/kernel.jl +++ b/lib/cl/kernel.jl @@ -5,7 +5,7 @@ export clcall mutable struct Kernel <: CLObject const id::cl_kernel - function Kernel(k::cl_kernel, retain::Bool=false) + function Kernel(k::cl_kernel, retain::Bool = false) kernel = new(k) retain && clRetainKernel(kernel) finalizer(clReleaseKernel, kernel) @@ -38,7 +38,7 @@ struct LocalMem{T} nbytes::Csize_t end -function LocalMem(::Type{T}, len::Integer) where T +function LocalMem(::Type{T}, len::Integer) where {T} @assert len > 0 nbytes = sizeof(T) * len return LocalMem{T}(convert(Csize_t, nbytes)) @@ -55,18 +55,18 @@ Base.unsafe_convert(::Type{Ptr{T}}, l::LocalMem{T}) where {T} = l function set_arg!(k::Kernel, idx::Integer, arg::Nothing) @assert idx > 0 - clSetKernelArg(k, cl_uint(idx-1), sizeof(cl_mem), C_NULL) + clSetKernelArg(k, cl_uint(idx - 1), sizeof(cl_mem), C_NULL) return k end # SVMBuffers ## when passing using `cl.call` function set_arg!(k::Kernel, idx::Integer, arg::Union{HostBuffer, DeviceBuffer, SharedBuffer}) - ext_clSetKernelArgMemPointerINTEL(k, cl_uint(idx-1), arg.ptr) + ext_clSetKernelArgMemPointerINTEL(k, cl_uint(idx - 1), arg.ptr) return k end ## when passing with `clcall`, which has pre-converted the buffer -function set_arg!(k::Kernel, idx::Integer, arg::Union{Ptr,Core.LLVMPtr}) +function set_arg!(k::Kernel, idx::Integer, arg::Union{Ptr, Core.LLVMPtr}) arg = reinterpret(Ptr{Cvoid}, arg) if arg != C_NULL # XXX: this assumes that the receiving argument is pointer-typed, which is not the @@ -74,7 +74,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::Union{Ptr,Core.LLVMPtr}) # `Core.LLVMPtr`, which _is_ pointer-valued. We retain this handling for `Ptr` # for users passing pointers to OpenCL C, and because `Ptr` is pointer-valued # starting with Julia 1.12. - ext_clSetKernelArgMemPointerINTEL(k, cl_uint(idx-1), arg) + ext_clSetKernelArgMemPointerINTEL(k, cl_uint(idx - 1), arg) end return k end @@ -82,29 +82,31 @@ end # regular buffers function set_arg!(k::Kernel, idx::Integer, arg::AbstractBuffer) arg_boxed = Ref(arg.id) - clSetKernelArg(k, cl_uint(idx-1), sizeof(cl_mem), arg_boxed) + clSetKernelArg(k, cl_uint(idx - 1), sizeof(cl_mem), arg_boxed) return k end function set_arg!(k::Kernel, idx::Integer, arg::LocalMem) - clSetKernelArg(k, cl_uint(idx-1), arg.nbytes, C_NULL) + clSetKernelArg(k, cl_uint(idx - 1), arg.nbytes, C_NULL) return k end -function set_arg!(k::Kernel, idx::Integer, arg::T) where T +function set_arg!(k::Kernel, idx::Integer, arg::T) where {T} ref = Ref(arg) tsize = sizeof(ref) err = unchecked_clSetKernelArg(k, cl_uint(idx - 1), tsize, ref) if err == CL_INVALID_ARG_SIZE - error("""Mismatch between Julia and OpenCL type for kernel argument $idx. - - Possible reasons: - - OpenCL does not support empty types. - - Vectors of length 3 (e.g., `float3`) are packed as 4-element vectors; - consider padding your tuples. - - The alignment of fields in your struct may not match the OpenCL layout. - Make sure your Julia definition matches the OpenCL layout, e.g., by - using `__attribute__((packed))` in your OpenCL struct definition.""") + error( + """Mismatch between Julia and OpenCL type for kernel argument $idx. + + Possible reasons: + - OpenCL does not support empty types. + - Vectors of length 3 (e.g., `float3`) are packed as 4-element vectors; + consider padding your tuples. + - The alignment of fields in your struct may not match the OpenCL layout. + Make sure your Julia definition matches the OpenCL layout, e.g., by + using `__attribute__((packed))` in your OpenCL struct definition.""" + ) elseif err != CL_SUCCESS throw(CLError(err)) end @@ -115,12 +117,15 @@ function set_args!(k::Kernel, args...) for (i, a) in enumerate(args) set_arg!(k, i, a) end + return end -function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing; - global_work_offset=nothing, wait_on::Vector{Event}=Event[]) +function enqueue_kernel( + k::Kernel, global_work_size, local_work_size = nothing; + global_work_offset = nothing, wait_on::Vector{Event} = Event[] + ) max_work_dim = device().max_work_item_dims - work_dim = length(global_work_size) + work_dim = length(global_work_size) if work_dim > max_work_dim throw(ArgumentError("global_work_size has max dim of $max_work_dim")) end @@ -170,20 +175,26 @@ function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing; end ret_event = Ref{cl_event}() - clEnqueueNDRangeKernel(queue(), k, cl_uint(work_dim), goffset, gsize, lsize, - n_events, wait_event_ids, ret_event) - return Event(ret_event[], retain=false) + clEnqueueNDRangeKernel( + queue(), k, cl_uint(work_dim), goffset, gsize, lsize, + n_events, wait_event_ids, ret_event + ) + return Event(ret_event[], retain = false) end -function call(k::Kernel, args...; global_size=(1,), local_size=nothing, - global_work_offset=nothing, wait_on::Vector{Event}=Event[], - pointers::Vector{CLPtr}=CLPtr[]) +function call( + k::Kernel, args...; global_size = (1,), local_size = nothing, + global_work_offset = nothing, wait_on::Vector{Event} = Event[], + pointers::Vector{CLPtr} = CLPtr[] + ) set_args!(k, args...) if !isempty(pointers) - clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, - sizeof(pointers), pointers) + clSetKernelExecInfo( + k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, + sizeof(pointers), pointers + ) end - enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on) + return enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on) end # convert the argument values to match the kernel's signature (specified by the user) @@ -202,23 +213,27 @@ end push!(ex.args, :($(arg_ptrs[i]) = Base.unsafe_convert($(types[i]), $(converted_args[i])))) end - append!(ex.args, (quote - GC.@preserve $(converted_args...) begin - f($(arg_ptrs...)) - end - end).args) + append!( + ex.args, ( + quote + GC.@preserve $(converted_args...) begin + f($(arg_ptrs...)) + end + end + ).args + ) return ex end -clcall(f::F, types::Tuple, args::Vararg{Any,N}; kwargs...) where {N,F} = +clcall(f::F, types::Tuple, args::Vararg{Any, N}; kwargs...) where {N, F} = clcall(f, _to_tuple_type(types), args...; kwargs...) -function clcall(k::Kernel, types::Type{T}, args::Vararg{Any,N}; kwargs...) where {T,N} - call_closure = function (converted_args::Vararg{Any,N}) - call(k, converted_args...; kwargs...) +function clcall(k::Kernel, types::Type{T}, args::Vararg{Any, N}; kwargs...) where {T, N} + call_closure = function (converted_args::Vararg{Any, N}) + return call(k, converted_args...; kwargs...) end - convert_arguments(call_closure, types, args...) + return convert_arguments(call_closure, types, args...) end # From `julia/base/reflection.jl`, adjusted to add specialization on `t`. @@ -238,11 +253,11 @@ function _to_tuple_type(t) else error("expected tuple type") end - t + return t end -function enqueue_task(k::Kernel; wait_for=nothing) - n_evts = 0 +function enqueue_task(k::Kernel; wait_for = nothing) + n_evts = 0 evt_ids = C_NULL #TODO: this should be split out into its own function if wait_for !== nothing @@ -278,11 +293,11 @@ function Base.getproperty(k::Kernel, s::Symbol) elseif s == :context result = Ref{cl_context}() clGetKernelInfo(k, CL_KERNEL_CONTEXT, sizeof(cl_context), result, C_NULL) - return Context(result[], retain=true) + return Context(result[], retain = true) elseif s == :program result = Ref{cl_program}() clGetKernelInfo(k, CL_KERNEL_PROGRAM, sizeof(cl_program), result, C_NULL) - return Program(result[], retain=true) + return Program(result[], retain = true) elseif s == :attributes size = Ref{Csize_t}() err = unchecked_clGetKernelInfo(k, CL_KERNEL_ATTRIBUTES, 0, C_NULL, size) @@ -314,7 +329,7 @@ function Base.getproperty(ki::KernelWorkGroupInfo, s::Symbol) return result[] end - if s == :size + return if s == :size Int(get(CL_KERNEL_WORK_GROUP_SIZE, Csize_t)) elseif s == :compile_size Int.(get(CL_KERNEL_COMPILE_WORK_GROUP_SIZE, NTuple{3, Csize_t})) diff --git a/lib/cl/libopencl.jl b/lib/cl/libopencl.jl index c906de7b..368d8cb4 100644 --- a/lib/cl/libopencl.jl +++ b/lib/cl/libopencl.jl @@ -4,9 +4,11 @@ end function check(f) - res = retry_reclaim(err -> err == CL_OUT_OF_RESOURCES || - err == CL_MEM_OBJECT_ALLOCATION_FAILURE || - err == CL_OUT_OF_HOST_MEMORY) do + res = retry_reclaim( + err -> err == CL_OUT_OF_RESOURCES || + err == CL_MEM_OBJECT_ALLOCATION_FAILURE || + err == CL_OUT_OF_HOST_MEMORY + ) do return f() end @@ -188,7 +190,7 @@ end const cl_image_format = _cl_image_format struct _cl_image_desc - data::NTuple{72,UInt8} + data::NTuple{72, UInt8} end function Base.getproperty(x::Ptr{_cl_image_desc}, f::Symbol) @@ -210,7 +212,7 @@ function Base.getproperty(x::_cl_image_desc, f::Symbol) r = Ref{_cl_image_desc}(x) ptr = Base.unsafe_convert(Ptr{_cl_image_desc}, r) fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) + return GC.@preserve r unsafe_load(fptr) end function Base.setproperty!(x::Ptr{_cl_image_desc}, f::Symbol, v) @@ -228,44 +230,60 @@ const cl_buffer_region = _cl_buffer_region struct _cl_name_version version::cl_version - name::NTuple{64,Cchar} + name::NTuple{64, Cchar} end const cl_name_version = _cl_name_version @checked function clGetPlatformIDs(num_entries, platforms, num_platforms) - @ccall libopencl.clGetPlatformIDs(num_entries::cl_uint, platforms::Ptr{cl_platform_id}, - num_platforms::Ptr{cl_uint})::cl_int + @ccall libopencl.clGetPlatformIDs( + num_entries::cl_uint, platforms::Ptr{cl_platform_id}, + num_platforms::Ptr{cl_uint} + )::cl_int end -@checked function clGetPlatformInfo(platform, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetPlatformInfo(platform::cl_platform_id, - param_name::cl_platform_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetPlatformInfo( + platform, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetPlatformInfo( + platform::cl_platform_id, + param_name::cl_platform_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clGetDeviceIDs(platform, device_type, num_entries, devices, num_devices) - @ccall libopencl.clGetDeviceIDs(platform::cl_platform_id, device_type::cl_device_type, - num_entries::cl_uint, devices::Ptr{cl_device_id}, - num_devices::Ptr{cl_uint})::cl_int -end - -@checked function clGetDeviceInfo(device, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetDeviceInfo(device::cl_device_id, param_name::cl_device_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clCreateSubDevices(in_device, properties, num_devices, out_devices, - num_devices_ret) - @ccall libopencl.clCreateSubDevices(in_device::cl_device_id, - properties::Ptr{cl_device_partition_property}, - num_devices::cl_uint, - out_devices::Ptr{cl_device_id}, - num_devices_ret::Ptr{cl_uint})::cl_int + @ccall libopencl.clGetDeviceIDs( + platform::cl_platform_id, device_type::cl_device_type, + num_entries::cl_uint, devices::Ptr{cl_device_id}, + num_devices::Ptr{cl_uint} + )::cl_int +end + +@checked function clGetDeviceInfo( + device, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetDeviceInfo( + device::cl_device_id, param_name::cl_device_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clCreateSubDevices( + in_device, properties, num_devices, out_devices, + num_devices_ret + ) + @ccall libopencl.clCreateSubDevices( + in_device::cl_device_id, + properties::Ptr{cl_device_partition_property}, + num_devices::cl_uint, + out_devices::Ptr{cl_device_id}, + num_devices_ret::Ptr{cl_uint} + )::cl_int end @checked function clRetainDevice(device) @@ -277,36 +295,50 @@ end end @checked function clSetDefaultDeviceCommandQueue(context, device, command_queue) - @ccall libopencl.clSetDefaultDeviceCommandQueue(context::cl_context, - device::cl_device_id, - command_queue::cl_command_queue)::cl_int + @ccall libopencl.clSetDefaultDeviceCommandQueue( + context::cl_context, + device::cl_device_id, + command_queue::cl_command_queue + )::cl_int end @checked function clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp) - @ccall libopencl.clGetDeviceAndHostTimer(device::cl_device_id, - device_timestamp::Ptr{cl_ulong}, - host_timestamp::Ptr{cl_ulong})::cl_int + @ccall libopencl.clGetDeviceAndHostTimer( + device::cl_device_id, + device_timestamp::Ptr{cl_ulong}, + host_timestamp::Ptr{cl_ulong} + )::cl_int end @checked function clGetHostTimer(device, host_timestamp) - @ccall libopencl.clGetHostTimer(device::cl_device_id, - host_timestamp::Ptr{cl_ulong})::cl_int -end - -function clCreateContext(properties, num_devices, devices, pfn_notify, user_data, - errcode_ret) - @ccall libopencl.clCreateContext(properties::Ptr{cl_context_properties}, - num_devices::cl_uint, devices::Ptr{cl_device_id}, - pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_context -end - -function clCreateContextFromType(properties, device_type, pfn_notify, user_data, - errcode_ret) - @ccall libopencl.clCreateContextFromType(properties::Ptr{cl_context_properties}, - device_type::cl_device_type, - pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_context + @ccall libopencl.clGetHostTimer( + device::cl_device_id, + host_timestamp::Ptr{cl_ulong} + )::cl_int +end + +function clCreateContext( + properties, num_devices, devices, pfn_notify, user_data, + errcode_ret + ) + return @ccall libopencl.clCreateContext( + properties::Ptr{cl_context_properties}, + num_devices::cl_uint, devices::Ptr{cl_device_id}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_context +end + +function clCreateContextFromType( + properties, device_type, pfn_notify, user_data, + errcode_ret + ) + return @ccall libopencl.clCreateContextFromType( + properties::Ptr{cl_context_properties}, + device_type::cl_device_type, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_context end @checked function clRetainContext(context) @@ -317,24 +349,32 @@ end @ccall libopencl.clReleaseContext(context::cl_context)::cl_int end -@checked function clGetContextInfo(context, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetContextInfo(context::cl_context, param_name::cl_context_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetContextInfo( + context, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetContextInfo( + context::cl_context, param_name::cl_context_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clSetContextDestructorCallback(context, pfn_notify, user_data) - @ccall libopencl.clSetContextDestructorCallback(context::cl_context, - pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetContextDestructorCallback( + context::cl_context, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid} + )::cl_int end function clCreateCommandQueueWithProperties(context, device, properties, errcode_ret) - @ccall libopencl.clCreateCommandQueueWithProperties(context::cl_context, - device::cl_device_id, - properties::Ptr{cl_queue_properties}, - errcode_ret::Ptr{cl_int})::cl_command_queue + return @ccall libopencl.clCreateCommandQueueWithProperties( + context::cl_context, + device::cl_device_id, + properties::Ptr{cl_queue_properties}, + errcode_ret::Ptr{cl_int} + )::cl_command_queue end @checked function clRetainCommandQueue(command_queue) @@ -345,61 +385,85 @@ end @ccall libopencl.clReleaseCommandQueue(command_queue::cl_command_queue)::cl_int end -@checked function clGetCommandQueueInfo(command_queue, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetCommandQueueInfo(command_queue::cl_command_queue, - param_name::cl_command_queue_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetCommandQueueInfo( + command_queue, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetCommandQueueInfo( + command_queue::cl_command_queue, + param_name::cl_command_queue_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end function clCreateBuffer(context, flags, size, host_ptr, errcode_ret) - @ccall libopencl.clCreateBuffer(context::cl_context, flags::cl_mem_flags, size::Csize_t, - host_ptr::Ptr{Cvoid}, errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateBuffer( + context::cl_context, flags::cl_mem_flags, size::Csize_t, + host_ptr::Ptr{Cvoid}, errcode_ret::Ptr{cl_int} + )::cl_mem end -function clCreateSubBuffer(buffer, flags, buffer_create_type, buffer_create_info, - errcode_ret) - @ccall libopencl.clCreateSubBuffer(buffer::cl_mem, flags::cl_mem_flags, - buffer_create_type::cl_buffer_create_type, - buffer_create_info::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem +function clCreateSubBuffer( + buffer, flags, buffer_create_type, buffer_create_info, + errcode_ret + ) + return @ccall libopencl.clCreateSubBuffer( + buffer::cl_mem, flags::cl_mem_flags, + buffer_create_type::cl_buffer_create_type, + buffer_create_info::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem end function clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret) - @ccall libopencl.clCreateImage(context::cl_context, flags::cl_mem_flags, - image_format::Ptr{cl_image_format}, - image_desc::Ptr{cl_image_desc}, host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem -end - -function clCreatePipe(context, flags, pipe_packet_size, pipe_max_packets, properties, - errcode_ret) - @ccall libopencl.clCreatePipe(context::cl_context, flags::cl_mem_flags, - pipe_packet_size::cl_uint, pipe_max_packets::cl_uint, - properties::Ptr{cl_pipe_properties}, - errcode_ret::Ptr{cl_int})::cl_mem -end - -function clCreateBufferWithProperties(context, properties, flags, size, host_ptr, - errcode_ret) - @ccall libopencl.clCreateBufferWithProperties(context::cl_context, - properties::Ptr{cl_mem_properties}, - flags::cl_mem_flags, size::Csize_t, - host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem -end - -function clCreateImageWithProperties(context, properties, flags, image_format, image_desc, - host_ptr, errcode_ret) - @ccall libopencl.clCreateImageWithProperties(context::cl_context, - properties::Ptr{cl_mem_properties}, - flags::cl_mem_flags, - image_format::Ptr{cl_image_format}, - image_desc::Ptr{cl_image_desc}, - host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateImage( + context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_desc::Ptr{cl_image_desc}, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem +end + +function clCreatePipe( + context, flags, pipe_packet_size, pipe_max_packets, properties, + errcode_ret + ) + return @ccall libopencl.clCreatePipe( + context::cl_context, flags::cl_mem_flags, + pipe_packet_size::cl_uint, pipe_max_packets::cl_uint, + properties::Ptr{cl_pipe_properties}, + errcode_ret::Ptr{cl_int} + )::cl_mem +end + +function clCreateBufferWithProperties( + context, properties, flags, size, host_ptr, + errcode_ret + ) + return @ccall libopencl.clCreateBufferWithProperties( + context::cl_context, + properties::Ptr{cl_mem_properties}, + flags::cl_mem_flags, size::Csize_t, + host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem +end + +function clCreateImageWithProperties( + context, properties, flags, image_format, image_desc, + host_ptr, errcode_ret + ) + return @ccall libopencl.clCreateImageWithProperties( + context::cl_context, + properties::Ptr{cl_mem_properties}, + flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_desc::Ptr{cl_image_desc}, + host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem end @checked function clRetainMemObject(memobj) @@ -410,55 +474,77 @@ end @ccall libopencl.clReleaseMemObject(memobj::cl_mem)::cl_int end -@checked function clGetSupportedImageFormats(context, flags, image_type, num_entries, - image_formats, num_image_formats) - @ccall libopencl.clGetSupportedImageFormats(context::cl_context, flags::cl_mem_flags, - image_type::cl_mem_object_type, - num_entries::cl_uint, - image_formats::Ptr{cl_image_format}, - num_image_formats::Ptr{cl_uint})::cl_int -end - -@checked function clGetMemObjectInfo(memobj, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetMemObjectInfo(memobj::cl_mem, param_name::cl_mem_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clGetImageInfo(image, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetImageInfo(image::cl_mem, param_name::cl_image_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clGetPipeInfo(pipe, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetPipeInfo(pipe::cl_mem, param_name::cl_pipe_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetSupportedImageFormats( + context, flags, image_type, num_entries, + image_formats, num_image_formats + ) + @ccall libopencl.clGetSupportedImageFormats( + context::cl_context, flags::cl_mem_flags, + image_type::cl_mem_object_type, + num_entries::cl_uint, + image_formats::Ptr{cl_image_format}, + num_image_formats::Ptr{cl_uint} + )::cl_int +end + +@checked function clGetMemObjectInfo( + memobj, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetMemObjectInfo( + memobj::cl_mem, param_name::cl_mem_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clGetImageInfo( + image, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetImageInfo( + image::cl_mem, param_name::cl_image_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clGetPipeInfo( + pipe, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetPipeInfo( + pipe::cl_mem, param_name::cl_pipe_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clSetMemObjectDestructorCallback(memobj, pfn_notify, user_data) - @ccall libopencl.clSetMemObjectDestructorCallback(memobj::cl_mem, - pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetMemObjectDestructorCallback( + memobj::cl_mem, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid} + )::cl_int end function clSVMAlloc(context, flags, size, alignment) - @ccall libopencl.clSVMAlloc(context::cl_context, flags::cl_svm_mem_flags, size::Csize_t, - alignment::cl_uint)::Ptr{Cvoid} + return @ccall libopencl.clSVMAlloc( + context::cl_context, flags::cl_svm_mem_flags, size::Csize_t, + alignment::cl_uint + )::Ptr{Cvoid} end function clSVMFree(context, svm_pointer) - @ccall libopencl.clSVMFree(context::cl_context, svm_pointer::Ptr{Cvoid})::Cvoid + return @ccall libopencl.clSVMFree(context::cl_context, svm_pointer::Ptr{Cvoid})::Cvoid end function clCreateSamplerWithProperties(context, sampler_properties, errcode_ret) - @ccall libopencl.clCreateSamplerWithProperties(context::cl_context, - sampler_properties::Ptr{cl_sampler_properties}, - errcode_ret::Ptr{cl_int})::cl_sampler + return @ccall libopencl.clCreateSamplerWithProperties( + context::cl_context, + sampler_properties::Ptr{cl_sampler_properties}, + errcode_ret::Ptr{cl_int} + )::cl_sampler end @checked function clRetainSampler(sampler) @@ -469,43 +555,59 @@ end @ccall libopencl.clReleaseSampler(sampler::cl_sampler)::cl_int end -@checked function clGetSamplerInfo(sampler, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetSamplerInfo(sampler::cl_sampler, param_name::cl_sampler_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetSamplerInfo( + sampler, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetSamplerInfo( + sampler::cl_sampler, param_name::cl_sampler_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end function clCreateProgramWithSource(context, count, strings, lengths, errcode_ret) - @ccall libopencl.clCreateProgramWithSource(context::cl_context, count::cl_uint, - strings::Ptr{Ptr{Cchar}}, - lengths::Ptr{Csize_t}, - errcode_ret::Ptr{cl_int})::cl_program -end - -function clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, - binary_status, errcode_ret) - @ccall libopencl.clCreateProgramWithBinary(context::cl_context, num_devices::cl_uint, - device_list::Ptr{cl_device_id}, - lengths::Ptr{Csize_t}, - binaries::Ptr{Ptr{Cuchar}}, - binary_status::Ptr{cl_int}, - errcode_ret::Ptr{cl_int})::cl_program -end - -function clCreateProgramWithBuiltInKernels(context, num_devices, device_list, kernel_names, - errcode_ret) - @ccall libopencl.clCreateProgramWithBuiltInKernels(context::cl_context, - num_devices::cl_uint, - device_list::Ptr{cl_device_id}, - kernel_names::Ptr{Cchar}, - errcode_ret::Ptr{cl_int})::cl_program + return @ccall libopencl.clCreateProgramWithSource( + context::cl_context, count::cl_uint, + strings::Ptr{Ptr{Cchar}}, + lengths::Ptr{Csize_t}, + errcode_ret::Ptr{cl_int} + )::cl_program +end + +function clCreateProgramWithBinary( + context, num_devices, device_list, lengths, binaries, + binary_status, errcode_ret + ) + return @ccall libopencl.clCreateProgramWithBinary( + context::cl_context, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, + lengths::Ptr{Csize_t}, + binaries::Ptr{Ptr{Cuchar}}, + binary_status::Ptr{cl_int}, + errcode_ret::Ptr{cl_int} + )::cl_program +end + +function clCreateProgramWithBuiltInKernels( + context, num_devices, device_list, kernel_names, + errcode_ret + ) + return @ccall libopencl.clCreateProgramWithBuiltInKernels( + context::cl_context, + num_devices::cl_uint, + device_list::Ptr{cl_device_id}, + kernel_names::Ptr{Cchar}, + errcode_ret::Ptr{cl_int} + )::cl_program end function clCreateProgramWithIL(context, il, length, errcode_ret) - @ccall libopencl.clCreateProgramWithIL(context::cl_context, il::Ptr{Cvoid}, - length::Csize_t, - errcode_ret::Ptr{cl_int})::cl_program + return @ccall libopencl.clCreateProgramWithIL( + context::cl_context, il::Ptr{Cvoid}, + length::Csize_t, + errcode_ret::Ptr{cl_int} + )::cl_program end @checked function clRetainProgram(program) @@ -516,82 +618,114 @@ end @ccall libopencl.clReleaseProgram(program::cl_program)::cl_int end -@checked function clBuildProgram(program, num_devices, device_list, options, pfn_notify, - user_data) - @ccall libopencl.clBuildProgram(program::cl_program, num_devices::cl_uint, - device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, - pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid})::cl_int -end - -@checked function clCompileProgram(program, num_devices, device_list, options, - num_input_headers, input_headers, header_include_names, - pfn_notify, user_data) - @ccall libopencl.clCompileProgram(program::cl_program, num_devices::cl_uint, - device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, - num_input_headers::cl_uint, - input_headers::Ptr{cl_program}, - header_include_names::Ptr{Ptr{Cchar}}, - pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid})::cl_int -end - -function clLinkProgram(context, num_devices, device_list, options, num_input_programs, - input_programs, pfn_notify, user_data, errcode_ret) - @ccall libopencl.clLinkProgram(context::cl_context, num_devices::cl_uint, - device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, - num_input_programs::cl_uint, - input_programs::Ptr{cl_program}, pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_program +@checked function clBuildProgram( + program, num_devices, device_list, options, pfn_notify, + user_data + ) + @ccall libopencl.clBuildProgram( + program::cl_program, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid} + )::cl_int +end + +@checked function clCompileProgram( + program, num_devices, device_list, options, + num_input_headers, input_headers, header_include_names, + pfn_notify, user_data + ) + @ccall libopencl.clCompileProgram( + program::cl_program, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + num_input_headers::cl_uint, + input_headers::Ptr{cl_program}, + header_include_names::Ptr{Ptr{Cchar}}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid} + )::cl_int +end + +function clLinkProgram( + context, num_devices, device_list, options, num_input_programs, + input_programs, pfn_notify, user_data, errcode_ret + ) + return @ccall libopencl.clLinkProgram( + context::cl_context, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + num_input_programs::cl_uint, + input_programs::Ptr{cl_program}, pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_program end @checked function clSetProgramReleaseCallback(program, pfn_notify, user_data) - @ccall libopencl.clSetProgramReleaseCallback(program::cl_program, - pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetProgramReleaseCallback( + program::cl_program, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid} + )::cl_int end -@checked function clSetProgramSpecializationConstant(program, spec_id, spec_size, - spec_value) - @ccall libopencl.clSetProgramSpecializationConstant(program::cl_program, - spec_id::cl_uint, - spec_size::Csize_t, - spec_value::Ptr{Cvoid})::cl_int +@checked function clSetProgramSpecializationConstant( + program, spec_id, spec_size, + spec_value + ) + @ccall libopencl.clSetProgramSpecializationConstant( + program::cl_program, + spec_id::cl_uint, + spec_size::Csize_t, + spec_value::Ptr{Cvoid} + )::cl_int end @checked function clUnloadPlatformCompiler(platform) @ccall libopencl.clUnloadPlatformCompiler(platform::cl_platform_id)::cl_int end -@checked function clGetProgramInfo(program, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetProgramInfo(program::cl_program, param_name::cl_program_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetProgramInfo( + program, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetProgramInfo( + program::cl_program, param_name::cl_program_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end -@checked function clGetProgramBuildInfo(program, device, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetProgramBuildInfo(program::cl_program, device::cl_device_id, - param_name::cl_program_build_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetProgramBuildInfo( + program, device, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetProgramBuildInfo( + program::cl_program, device::cl_device_id, + param_name::cl_program_build_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end function clCreateKernel(program, kernel_name, errcode_ret) - @ccall libopencl.clCreateKernel(program::cl_program, kernel_name::Ptr{Cchar}, - errcode_ret::Ptr{cl_int})::cl_kernel + return @ccall libopencl.clCreateKernel( + program::cl_program, kernel_name::Ptr{Cchar}, + errcode_ret::Ptr{cl_int} + )::cl_kernel end @checked function clCreateKernelsInProgram(program, num_kernels, kernels, num_kernels_ret) - @ccall libopencl.clCreateKernelsInProgram(program::cl_program, num_kernels::cl_uint, - kernels::Ptr{cl_kernel}, - num_kernels_ret::Ptr{cl_uint})::cl_int + @ccall libopencl.clCreateKernelsInProgram( + program::cl_program, num_kernels::cl_uint, + kernels::Ptr{cl_kernel}, + num_kernels_ret::Ptr{cl_uint} + )::cl_int end function clCloneKernel(source_kernel, errcode_ret) - @ccall libopencl.clCloneKernel(source_kernel::cl_kernel, - errcode_ret::Ptr{cl_int})::cl_kernel + return @ccall libopencl.clCloneKernel( + source_kernel::cl_kernel, + errcode_ret::Ptr{cl_int} + )::cl_kernel end @checked function clRetainKernel(kernel) @@ -603,71 +737,99 @@ end end @checked function clSetKernelArg(kernel, arg_index, arg_size, arg_value) - @ccall libopencl.clSetKernelArg(kernel::cl_kernel, arg_index::cl_uint, - arg_size::Csize_t, arg_value::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetKernelArg( + kernel::cl_kernel, arg_index::cl_uint, + arg_size::Csize_t, arg_value::Ptr{Cvoid} + )::cl_int end @checked function clSetKernelArgSVMPointer(kernel, arg_index, arg_value) - @ccall libopencl.clSetKernelArgSVMPointer(kernel::cl_kernel, arg_index::cl_uint, - arg_value::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetKernelArgSVMPointer( + kernel::cl_kernel, arg_index::cl_uint, + arg_value::Ptr{Cvoid} + )::cl_int end @checked function clSetKernelExecInfo(kernel, param_name, param_value_size, param_value) - @ccall libopencl.clSetKernelExecInfo(kernel::cl_kernel, param_name::cl_kernel_exec_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid})::cl_int -end - -@checked function clGetKernelInfo(kernel, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetKernelInfo(kernel::cl_kernel, param_name::cl_kernel_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clGetKernelArgInfo(kernel, arg_indx, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetKernelArgInfo(kernel::cl_kernel, arg_indx::cl_uint, - param_name::cl_kernel_arg_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clGetKernelWorkGroupInfo(kernel, device, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetKernelWorkGroupInfo(kernel::cl_kernel, device::cl_device_id, - param_name::cl_kernel_work_group_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clGetKernelSubGroupInfo(kernel, device, param_name, input_value_size, - input_value, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetKernelSubGroupInfo(kernel::cl_kernel, device::cl_device_id, - param_name::cl_kernel_sub_group_info, - input_value_size::Csize_t, - input_value::Ptr{Cvoid}, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int + @ccall libopencl.clSetKernelExecInfo( + kernel::cl_kernel, param_name::cl_kernel_exec_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid} + )::cl_int +end + +@checked function clGetKernelInfo( + kernel, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetKernelInfo( + kernel::cl_kernel, param_name::cl_kernel_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clGetKernelArgInfo( + kernel, arg_indx, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetKernelArgInfo( + kernel::cl_kernel, arg_indx::cl_uint, + param_name::cl_kernel_arg_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clGetKernelWorkGroupInfo( + kernel, device, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetKernelWorkGroupInfo( + kernel::cl_kernel, device::cl_device_id, + param_name::cl_kernel_work_group_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clGetKernelSubGroupInfo( + kernel, device, param_name, input_value_size, + input_value, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetKernelSubGroupInfo( + kernel::cl_kernel, device::cl_device_id, + param_name::cl_kernel_sub_group_info, + input_value_size::Csize_t, + input_value::Ptr{Cvoid}, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clWaitForEvents(num_events, event_list) @ccall libopencl.clWaitForEvents(num_events::cl_uint, event_list::Ptr{cl_event})::cl_int end -@checked function clGetEventInfo(event, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetEventInfo(event::cl_event, param_name::cl_event_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetEventInfo( + event, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetEventInfo( + event::cl_event, param_name::cl_event_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end function clCreateUserEvent(context, errcode_ret) - @ccall libopencl.clCreateUserEvent(context::cl_context, - errcode_ret::Ptr{cl_int})::cl_event + return @ccall libopencl.clCreateUserEvent( + context::cl_context, + errcode_ret::Ptr{cl_int} + )::cl_event end @checked function clRetainEvent(event) @@ -682,19 +844,27 @@ end @ccall libopencl.clSetUserEventStatus(event::cl_event, execution_status::cl_int)::cl_int end -@checked function clSetEventCallback(event, command_exec_callback_type, pfn_notify, - user_data) - @ccall libopencl.clSetEventCallback(event::cl_event, command_exec_callback_type::cl_int, - pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid})::cl_int +@checked function clSetEventCallback( + event, command_exec_callback_type, pfn_notify, + user_data + ) + @ccall libopencl.clSetEventCallback( + event::cl_event, command_exec_callback_type::cl_int, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid} + )::cl_int end -@checked function clGetEventProfilingInfo(event, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetEventProfilingInfo(event::cl_event, param_name::cl_profiling_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetEventProfilingInfo( + event, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetEventProfilingInfo( + event::cl_event, param_name::cl_profiling_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clFlush(command_queue) @@ -705,358 +875,480 @@ end @ccall libopencl.clFinish(command_queue::cl_command_queue)::cl_int end -@checked function clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, size, - ptr, num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueReadBuffer(command_queue::cl_command_queue, buffer::cl_mem, - blocking_read::cl_bool, offset::Csize_t, - size::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueReadBufferRect(command_queue, buffer, blocking_read, - buffer_origin, host_origin, region, - buffer_row_pitch, buffer_slice_pitch, - host_row_pitch, host_slice_pitch, ptr, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueReadBufferRect(command_queue::cl_command_queue, - buffer::cl_mem, blocking_read::cl_bool, - buffer_origin::Ptr{Csize_t}, - host_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - buffer_row_pitch::Csize_t, - buffer_slice_pitch::Csize_t, - host_row_pitch::Csize_t, - host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset, size, - ptr, num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueWriteBuffer(command_queue::cl_command_queue, buffer::cl_mem, - blocking_write::cl_bool, offset::Csize_t, - size::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueWriteBufferRect(command_queue, buffer, blocking_write, - buffer_origin, host_origin, region, - buffer_row_pitch, buffer_slice_pitch, - host_row_pitch, host_slice_pitch, ptr, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueWriteBufferRect(command_queue::cl_command_queue, - buffer::cl_mem, blocking_write::cl_bool, - buffer_origin::Ptr{Csize_t}, - host_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - buffer_row_pitch::Csize_t, - buffer_slice_pitch::Csize_t, - host_row_pitch::Csize_t, - host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueFillBuffer(command_queue, buffer, pattern, pattern_size, offset, - size, num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueFillBuffer(command_queue::cl_command_queue, buffer::cl_mem, - pattern::Ptr{Cvoid}, pattern_size::Csize_t, - offset::Csize_t, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueCopyBuffer(command_queue, src_buffer, dst_buffer, src_offset, - dst_offset, size, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueCopyBuffer(command_queue::cl_command_queue, - src_buffer::cl_mem, dst_buffer::cl_mem, - src_offset::Csize_t, dst_offset::Csize_t, - size::Csize_t, num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueCopyBufferRect(command_queue, src_buffer, dst_buffer, src_origin, - dst_origin, region, src_row_pitch, - src_slice_pitch, dst_row_pitch, dst_slice_pitch, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueCopyBufferRect(command_queue::cl_command_queue, - src_buffer::cl_mem, dst_buffer::cl_mem, - src_origin::Ptr{Csize_t}, - dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, - src_row_pitch::Csize_t, - src_slice_pitch::Csize_t, - dst_row_pitch::Csize_t, - dst_slice_pitch::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueReadImage(command_queue, image, blocking_read, origin, region, - row_pitch, slice_pitch, ptr, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueReadImage(command_queue::cl_command_queue, image::cl_mem, - blocking_read::cl_bool, origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, row_pitch::Csize_t, - slice_pitch::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueWriteImage(command_queue, image, blocking_write, origin, region, - input_row_pitch, input_slice_pitch, ptr, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueWriteImage(command_queue::cl_command_queue, image::cl_mem, - blocking_write::cl_bool, origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, input_row_pitch::Csize_t, - input_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueFillImage(command_queue, image, fill_color, origin, region, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueFillImage(command_queue::cl_command_queue, image::cl_mem, - fill_color::Ptr{Cvoid}, origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueCopyImage(command_queue, src_image, dst_image, src_origin, - dst_origin, region, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueCopyImage(command_queue::cl_command_queue, src_image::cl_mem, - dst_image::cl_mem, src_origin::Ptr{Csize_t}, - dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueCopyImageToBuffer(command_queue, src_image, dst_buffer, - src_origin, region, dst_offset, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueCopyImageToBuffer(command_queue::cl_command_queue, - src_image::cl_mem, dst_buffer::cl_mem, - src_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, dst_offset::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueCopyBufferToImage(command_queue, src_buffer, dst_image, - src_offset, dst_origin, region, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueCopyBufferToImage(command_queue::cl_command_queue, - src_buffer::cl_mem, dst_image::cl_mem, - src_offset::Csize_t, - dst_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -function clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags, offset, size, - num_events_in_wait_list, event_wait_list, event, errcode_ret) - @ccall libopencl.clEnqueueMapBuffer(command_queue::cl_command_queue, buffer::cl_mem, - blocking_map::cl_bool, map_flags::cl_map_flags, - offset::Csize_t, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event}, - errcode_ret::Ptr{cl_int})::Ptr{Cvoid} -end - -function clEnqueueMapImage(command_queue, image, blocking_map, map_flags, origin, region, - image_row_pitch, image_slice_pitch, num_events_in_wait_list, - event_wait_list, event, errcode_ret) - @ccall libopencl.clEnqueueMapImage(command_queue::cl_command_queue, image::cl_mem, - blocking_map::cl_bool, map_flags::cl_map_flags, - origin::Ptr{Csize_t}, region::Ptr{Csize_t}, - image_row_pitch::Ptr{Csize_t}, - image_slice_pitch::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, event::Ptr{cl_event}, - errcode_ret::Ptr{cl_int})::Ptr{Cvoid} -end - -@checked function clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueUnmapMemObject(command_queue::cl_command_queue, - memobj::cl_mem, mapped_ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueMigrateMemObjects(command_queue, num_mem_objects, mem_objects, - flags, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueMigrateMemObjects(command_queue::cl_command_queue, - num_mem_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - flags::cl_mem_migration_flags, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueNDRangeKernel(command_queue, kernel, work_dim, - global_work_offset, global_work_size, - local_work_size, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueNDRangeKernel(command_queue::cl_command_queue, - kernel::cl_kernel, work_dim::cl_uint, - global_work_offset::Ptr{Csize_t}, - global_work_size::Ptr{Csize_t}, - local_work_size::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueNativeKernel(command_queue, user_func, args, cb_args, - num_mem_objects, mem_list, args_mem_loc, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueNativeKernel(command_queue::cl_command_queue, - user_func::Ptr{Cvoid}, args::Ptr{Cvoid}, - cb_args::Csize_t, num_mem_objects::cl_uint, - mem_list::Ptr{cl_mem}, - args_mem_loc::Ptr{Ptr{Cvoid}}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueMarkerWithWaitList(command_queue::cl_command_queue, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueBarrierWithWaitList(command_queue, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueBarrierWithWaitList(command_queue::cl_command_queue, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMFree(command_queue, num_svm_pointers, svm_pointers, - pfn_free_func, user_data, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueSVMFree(command_queue::cl_command_queue, - num_svm_pointers::cl_uint, - svm_pointers::Ptr{Ptr{Cvoid}}, - pfn_free_func::Ptr{Cvoid}, user_data::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMemcpy(command_queue, blocking_copy, dst_ptr, src_ptr, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueSVMMemcpy(command_queue::cl_command_queue, - blocking_copy::cl_bool, dst_ptr::Ptr{Cvoid}, - src_ptr::Ptr{Cvoid}, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueSVMMemFill(command_queue::cl_command_queue, - svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, - pattern_size::Csize_t, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMap(command_queue, blocking_map, flags, svm_ptr, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueSVMMap(command_queue::cl_command_queue, blocking_map::cl_bool, - flags::cl_map_flags, svm_ptr::Ptr{Cvoid}, - size::Csize_t, num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMUnmap(command_queue, svm_ptr, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueSVMUnmap(command_queue::cl_command_queue, svm_ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMigrateMem(command_queue, num_svm_pointers, svm_pointers, - sizes, flags, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueSVMMigrateMem(command_queue::cl_command_queue, - num_svm_pointers::cl_uint, - svm_pointers::Ptr{Ptr{Cvoid}}, - sizes::Ptr{Csize_t}, - flags::cl_mem_migration_flags, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueReadBuffer( + command_queue, buffer, blocking_read, offset, size, + ptr, num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueReadBuffer( + command_queue::cl_command_queue, buffer::cl_mem, + blocking_read::cl_bool, offset::Csize_t, + size::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueReadBufferRect( + command_queue, buffer, blocking_read, + buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, + host_row_pitch, host_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueReadBufferRect( + command_queue::cl_command_queue, + buffer::cl_mem, blocking_read::cl_bool, + buffer_origin::Ptr{Csize_t}, + host_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + buffer_row_pitch::Csize_t, + buffer_slice_pitch::Csize_t, + host_row_pitch::Csize_t, + host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueWriteBuffer( + command_queue, buffer, blocking_write, offset, size, + ptr, num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueWriteBuffer( + command_queue::cl_command_queue, buffer::cl_mem, + blocking_write::cl_bool, offset::Csize_t, + size::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueWriteBufferRect( + command_queue, buffer, blocking_write, + buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, + host_row_pitch, host_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueWriteBufferRect( + command_queue::cl_command_queue, + buffer::cl_mem, blocking_write::cl_bool, + buffer_origin::Ptr{Csize_t}, + host_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + buffer_row_pitch::Csize_t, + buffer_slice_pitch::Csize_t, + host_row_pitch::Csize_t, + host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueFillBuffer( + command_queue, buffer, pattern, pattern_size, offset, + size, num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueFillBuffer( + command_queue::cl_command_queue, buffer::cl_mem, + pattern::Ptr{Cvoid}, pattern_size::Csize_t, + offset::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueCopyBuffer( + command_queue, src_buffer, dst_buffer, src_offset, + dst_offset, size, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueCopyBuffer( + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_offset::Csize_t, dst_offset::Csize_t, + size::Csize_t, num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueCopyBufferRect( + command_queue, src_buffer, dst_buffer, src_origin, + dst_origin, region, src_row_pitch, + src_slice_pitch, dst_row_pitch, dst_slice_pitch, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueCopyBufferRect( + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + src_row_pitch::Csize_t, + src_slice_pitch::Csize_t, + dst_row_pitch::Csize_t, + dst_slice_pitch::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueReadImage( + command_queue, image, blocking_read, origin, region, + row_pitch, slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueReadImage( + command_queue::cl_command_queue, image::cl_mem, + blocking_read::cl_bool, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, row_pitch::Csize_t, + slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueWriteImage( + command_queue, image, blocking_write, origin, region, + input_row_pitch, input_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueWriteImage( + command_queue::cl_command_queue, image::cl_mem, + blocking_write::cl_bool, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, input_row_pitch::Csize_t, + input_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueFillImage( + command_queue, image, fill_color, origin, region, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueFillImage( + command_queue::cl_command_queue, image::cl_mem, + fill_color::Ptr{Cvoid}, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueCopyImage( + command_queue, src_image, dst_image, src_origin, + dst_origin, region, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueCopyImage( + command_queue::cl_command_queue, src_image::cl_mem, + dst_image::cl_mem, src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueCopyImageToBuffer( + command_queue, src_image, dst_buffer, + src_origin, region, dst_offset, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueCopyImageToBuffer( + command_queue::cl_command_queue, + src_image::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, dst_offset::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueCopyBufferToImage( + command_queue, src_buffer, dst_image, + src_offset, dst_origin, region, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueCopyBufferToImage( + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_image::cl_mem, + src_offset::Csize_t, + dst_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +function clEnqueueMapBuffer( + command_queue, buffer, blocking_map, map_flags, offset, size, + num_events_in_wait_list, event_wait_list, event, errcode_ret + ) + return @ccall libopencl.clEnqueueMapBuffer( + command_queue::cl_command_queue, buffer::cl_mem, + blocking_map::cl_bool, map_flags::cl_map_flags, + offset::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event}, + errcode_ret::Ptr{cl_int} + )::Ptr{Cvoid} +end + +function clEnqueueMapImage( + command_queue, image, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, errcode_ret + ) + return @ccall libopencl.clEnqueueMapImage( + command_queue::cl_command_queue, image::cl_mem, + blocking_map::cl_bool, map_flags::cl_map_flags, + origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + image_row_pitch::Ptr{Csize_t}, + image_slice_pitch::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, event::Ptr{cl_event}, + errcode_ret::Ptr{cl_int} + )::Ptr{Cvoid} +end + +@checked function clEnqueueUnmapMemObject( + command_queue, memobj, mapped_ptr, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueUnmapMemObject( + command_queue::cl_command_queue, + memobj::cl_mem, mapped_ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueMigrateMemObjects( + command_queue, num_mem_objects, mem_objects, + flags, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueMigrateMemObjects( + command_queue::cl_command_queue, + num_mem_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + flags::cl_mem_migration_flags, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueNDRangeKernel( + command_queue, kernel, work_dim, + global_work_offset, global_work_size, + local_work_size, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueNDRangeKernel( + command_queue::cl_command_queue, + kernel::cl_kernel, work_dim::cl_uint, + global_work_offset::Ptr{Csize_t}, + global_work_size::Ptr{Csize_t}, + local_work_size::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueNativeKernel( + command_queue, user_func, args, cb_args, + num_mem_objects, mem_list, args_mem_loc, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueNativeKernel( + command_queue::cl_command_queue, + user_func::Ptr{Cvoid}, args::Ptr{Cvoid}, + cb_args::Csize_t, num_mem_objects::cl_uint, + mem_list::Ptr{cl_mem}, + args_mem_loc::Ptr{Ptr{Cvoid}}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueMarkerWithWaitList( + command_queue, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueMarkerWithWaitList( + command_queue::cl_command_queue, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueBarrierWithWaitList( + command_queue, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueBarrierWithWaitList( + command_queue::cl_command_queue, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMFree( + command_queue, num_svm_pointers, svm_pointers, + pfn_free_func, user_data, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMFree( + command_queue::cl_command_queue, + num_svm_pointers::cl_uint, + svm_pointers::Ptr{Ptr{Cvoid}}, + pfn_free_func::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMemcpy( + command_queue, blocking_copy, dst_ptr, src_ptr, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMMemcpy( + command_queue::cl_command_queue, + blocking_copy::cl_bool, dst_ptr::Ptr{Cvoid}, + src_ptr::Ptr{Cvoid}, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMemFill( + command_queue, svm_ptr, pattern, pattern_size, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMMemFill( + command_queue::cl_command_queue, + svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, + pattern_size::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMap( + command_queue, blocking_map, flags, svm_ptr, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMMap( + command_queue::cl_command_queue, blocking_map::cl_bool, + flags::cl_map_flags, svm_ptr::Ptr{Cvoid}, + size::Csize_t, num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMUnmap( + command_queue, svm_ptr, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMUnmap( + command_queue::cl_command_queue, svm_ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMigrateMem( + command_queue, num_svm_pointers, svm_pointers, + sizes, flags, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMMigrateMem( + command_queue::cl_command_queue, + num_svm_pointers::cl_uint, + svm_pointers::Ptr{Ptr{Cvoid}}, + sizes::Ptr{Csize_t}, + flags::cl_mem_migration_flags, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end function clGetExtensionFunctionAddressForPlatform(platform, func_name) - @ccall libopencl.clGetExtensionFunctionAddressForPlatform(platform::cl_platform_id, - func_name::Ptr{Cchar})::Ptr{Cvoid} -end - -function clCreateImage2D(context, flags, image_format, image_width, image_height, - image_row_pitch, host_ptr, errcode_ret) - @ccall libopencl.clCreateImage2D(context::cl_context, flags::cl_mem_flags, - image_format::Ptr{cl_image_format}, - image_width::Csize_t, image_height::Csize_t, - image_row_pitch::Csize_t, host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem -end - -function clCreateImage3D(context, flags, image_format, image_width, image_height, - image_depth, image_row_pitch, image_slice_pitch, host_ptr, - errcode_ret) - @ccall libopencl.clCreateImage3D(context::cl_context, flags::cl_mem_flags, - image_format::Ptr{cl_image_format}, - image_width::Csize_t, image_height::Csize_t, - image_depth::Csize_t, image_row_pitch::Csize_t, - image_slice_pitch::Csize_t, host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clGetExtensionFunctionAddressForPlatform( + platform::cl_platform_id, + func_name::Ptr{Cchar} + )::Ptr{Cvoid} +end + +function clCreateImage2D( + context, flags, image_format, image_width, image_height, + image_row_pitch, host_ptr, errcode_ret + ) + return @ccall libopencl.clCreateImage2D( + context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_width::Csize_t, image_height::Csize_t, + image_row_pitch::Csize_t, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem +end + +function clCreateImage3D( + context, flags, image_format, image_width, image_height, + image_depth, image_row_pitch, image_slice_pitch, host_ptr, + errcode_ret + ) + return @ccall libopencl.clCreateImage3D( + context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_width::Csize_t, image_height::Csize_t, + image_depth::Csize_t, image_row_pitch::Csize_t, + image_slice_pitch::Csize_t, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem end @checked function clEnqueueMarker(command_queue, event) - @ccall libopencl.clEnqueueMarker(command_queue::cl_command_queue, - event::Ptr{cl_event})::cl_int + @ccall libopencl.clEnqueueMarker( + command_queue::cl_command_queue, + event::Ptr{cl_event} + )::cl_int end @checked function clEnqueueWaitForEvents(command_queue, num_events, event_list) - @ccall libopencl.clEnqueueWaitForEvents(command_queue::cl_command_queue, - num_events::cl_uint, - event_list::Ptr{cl_event})::cl_int + @ccall libopencl.clEnqueueWaitForEvents( + command_queue::cl_command_queue, + num_events::cl_uint, + event_list::Ptr{cl_event} + )::cl_int end @checked function clEnqueueBarrier(command_queue) @@ -1068,29 +1360,39 @@ end end function clGetExtensionFunctionAddress(func_name) - @ccall libopencl.clGetExtensionFunctionAddress(func_name::Ptr{Cchar})::Ptr{Cvoid} + return @ccall libopencl.clGetExtensionFunctionAddress(func_name::Ptr{Cchar})::Ptr{Cvoid} end function clCreateCommandQueue(context, device, properties, errcode_ret) - @ccall libopencl.clCreateCommandQueue(context::cl_context, device::cl_device_id, - properties::cl_command_queue_properties, - errcode_ret::Ptr{cl_int})::cl_command_queue -end - -function clCreateSampler(context, normalized_coords, addressing_mode, filter_mode, - errcode_ret) - @ccall libopencl.clCreateSampler(context::cl_context, normalized_coords::cl_bool, - addressing_mode::cl_addressing_mode, - filter_mode::cl_filter_mode, - errcode_ret::Ptr{cl_int})::cl_sampler -end - -@checked function clEnqueueTask(command_queue, kernel, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueTask(command_queue::cl_command_queue, kernel::cl_kernel, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int + return @ccall libopencl.clCreateCommandQueue( + context::cl_context, device::cl_device_id, + properties::cl_command_queue_properties, + errcode_ret::Ptr{cl_int} + )::cl_command_queue +end + +function clCreateSampler( + context, normalized_coords, addressing_mode, filter_mode, + errcode_ret + ) + return @ccall libopencl.clCreateSampler( + context::cl_context, normalized_coords::cl_bool, + addressing_mode::cl_addressing_mode, + filter_mode::cl_filter_mode, + errcode_ret::Ptr{cl_int} + )::cl_sampler +end + +@checked function clEnqueueTask( + command_queue, kernel, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueTask( + command_queue::cl_command_queue, kernel::cl_kernel, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end const cl_gl_context_info = cl_uint @@ -1113,19 +1415,25 @@ const clCreateFromGLBuffer_t = Cvoid # typedef clCreateFromGLBuffer_t * clCreateFromGLBuffer_fn const clCreateFromGLBuffer_fn = Ptr{clCreateFromGLBuffer_t} -@checked function clGetGLContextInfoKHR(properties, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetGLContextInfoKHR(properties::Ptr{cl_context_properties}, - param_name::cl_gl_context_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetGLContextInfoKHR( + properties, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetGLContextInfoKHR( + properties::Ptr{cl_context_properties}, + param_name::cl_gl_context_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end function clCreateFromGLBuffer(context, flags, bufobj, errcode_ret) - @ccall libopencl.clCreateFromGLBuffer(context::cl_context, flags::cl_mem_flags, - bufobj::cl_GLuint, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateFromGLBuffer( + context::cl_context, flags::cl_mem_flags, + bufobj::cl_GLuint, + errcode_ret::Ptr{cl_int} + )::cl_mem end # typedef cl_mem CL_API_CALL clCreateFromGLTexture_t ( cl_context context , cl_mem_flags flags , cl_GLenum target , cl_GLint miplevel , cl_GLuint texture , cl_int * errcode_ret ) @@ -1135,10 +1443,12 @@ const clCreateFromGLTexture_t = Cvoid const clCreateFromGLTexture_fn = Ptr{clCreateFromGLTexture_t} function clCreateFromGLTexture(context, flags, target, miplevel, texture, errcode_ret) - @ccall libopencl.clCreateFromGLTexture(context::cl_context, flags::cl_mem_flags, - target::cl_GLenum, miplevel::cl_GLint, - texture::cl_GLuint, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateFromGLTexture( + context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int} + )::cl_mem end # typedef cl_mem CL_API_CALL clCreateFromGLRenderbuffer_t ( cl_context context , cl_mem_flags flags , cl_GLuint renderbuffer , cl_int * errcode_ret ) @@ -1172,42 +1482,58 @@ const clEnqueueReleaseGLObjects_t = Cvoid const clEnqueueReleaseGLObjects_fn = Ptr{clEnqueueReleaseGLObjects_t} function clCreateFromGLRenderbuffer(context, flags, renderbuffer, errcode_ret) - @ccall libopencl.clCreateFromGLRenderbuffer(context::cl_context, flags::cl_mem_flags, - renderbuffer::cl_GLuint, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateFromGLRenderbuffer( + context::cl_context, flags::cl_mem_flags, + renderbuffer::cl_GLuint, + errcode_ret::Ptr{cl_int} + )::cl_mem end @checked function clGetGLObjectInfo(memobj, gl_object_type, gl_object_name) - @ccall libopencl.clGetGLObjectInfo(memobj::cl_mem, - gl_object_type::Ptr{cl_gl_object_type}, - gl_object_name::Ptr{cl_GLuint})::cl_int -end - -@checked function clGetGLTextureInfo(memobj, param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetGLTextureInfo(memobj::cl_mem, param_name::cl_gl_texture_info, - param_value_size::Csize_t, param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int -end - -@checked function clEnqueueAcquireGLObjects(command_queue, num_objects, mem_objects, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueAcquireGLObjects(command_queue::cl_command_queue, - num_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueReleaseGLObjects(command_queue, num_objects, mem_objects, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueReleaseGLObjects(command_queue::cl_command_queue, - num_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int + @ccall libopencl.clGetGLObjectInfo( + memobj::cl_mem, + gl_object_type::Ptr{cl_gl_object_type}, + gl_object_name::Ptr{cl_GLuint} + )::cl_int +end + +@checked function clGetGLTextureInfo( + memobj, param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetGLTextureInfo( + memobj::cl_mem, param_name::cl_gl_texture_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int +end + +@checked function clEnqueueAcquireGLObjects( + command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueAcquireGLObjects( + command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueReleaseGLObjects( + command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueReleaseGLObjects( + command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end # typedef cl_mem CL_API_CALL clCreateFromGLTexture2D_t ( cl_context context , cl_mem_flags flags , cl_GLenum target , cl_GLint miplevel , cl_GLuint texture , cl_int * errcode_ret ) @@ -1223,17 +1549,21 @@ const clCreateFromGLTexture3D_t = Cvoid const clCreateFromGLTexture3D_fn = Ptr{clCreateFromGLTexture3D_t} function clCreateFromGLTexture2D(context, flags, target, miplevel, texture, errcode_ret) - @ccall libopencl.clCreateFromGLTexture2D(context::cl_context, flags::cl_mem_flags, - target::cl_GLenum, miplevel::cl_GLint, - texture::cl_GLuint, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateFromGLTexture2D( + context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int} + )::cl_mem end function clCreateFromGLTexture3D(context, flags, target, miplevel, texture, errcode_ret) - @ccall libopencl.clCreateFromGLTexture3D(context::cl_context, flags::cl_mem_flags, - target::cl_GLenum, miplevel::cl_GLint, - texture::cl_GLuint, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clCreateFromGLTexture3D( + context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int} + )::cl_mem end mutable struct __GLsync end @@ -1247,8 +1577,10 @@ const clCreateEventFromGLsyncKHR_t = Cvoid const clCreateEventFromGLsyncKHR_fn = Ptr{clCreateEventFromGLsyncKHR_t} function clCreateEventFromGLsyncKHR(context, sync, errcode_ret) - @ccall libopencl.clCreateEventFromGLsyncKHR(context::cl_context, sync::cl_GLsync, - errcode_ret::Ptr{cl_int})::cl_event + return @ccall libopencl.clCreateEventFromGLsyncKHR( + context::cl_context, sync::cl_GLsync, + errcode_ret::Ptr{cl_int} + )::cl_event end # typedef cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL_t ( cl_context context , cl_mem_flags flags , cl_mem_object_type image_type , cl_uint num_entries , cl_GLenum * gl_formats , cl_uint * num_texture_formats ) @@ -1257,15 +1589,19 @@ const clGetSupportedGLTextureFormatsINTEL_t = Cvoid # typedef clGetSupportedGLTextureFormatsINTEL_t * clGetSupportedGLTextureFormatsINTEL_fn const clGetSupportedGLTextureFormatsINTEL_fn = Ptr{clGetSupportedGLTextureFormatsINTEL_t} -@checked function clGetSupportedGLTextureFormatsINTEL(context, flags, image_type, - num_entries, gl_formats, - num_texture_formats) - @ccall libopencl.clGetSupportedGLTextureFormatsINTEL(context::cl_context, - flags::cl_mem_flags, - image_type::cl_mem_object_type, - num_entries::cl_uint, - gl_formats::Ptr{cl_GLenum}, - num_texture_formats::Ptr{cl_uint})::cl_int +@checked function clGetSupportedGLTextureFormatsINTEL( + context, flags, image_type, + num_entries, gl_formats, + num_texture_formats + ) + @ccall libopencl.clGetSupportedGLTextureFormatsINTEL( + context::cl_context, + flags::cl_mem_flags, + image_type::cl_mem_object_type, + num_entries::cl_uint, + gl_formats::Ptr{cl_GLenum}, + num_texture_formats::Ptr{cl_uint} + )::cl_int end const cl_device_partition_property_ext = cl_ulong @@ -1383,10 +1719,12 @@ const clGetCommandBufferInfoKHR_t = Cvoid const clGetCommandBufferInfoKHR_fn = Ptr{clGetCommandBufferInfoKHR_t} function clCreateCommandBufferKHR(num_queues, queues, properties, errcode_ret) - @ccall libopencl.clCreateCommandBufferKHR(num_queues::cl_uint, - queues::Ptr{cl_command_queue}, - properties::Ptr{cl_command_buffer_properties_khr}, - errcode_ret::Ptr{cl_int})::cl_command_buffer_khr + return @ccall libopencl.clCreateCommandBufferKHR( + num_queues::cl_uint, + queues::Ptr{cl_command_queue}, + properties::Ptr{cl_command_buffer_properties_khr}, + errcode_ret::Ptr{cl_int} + )::cl_command_buffer_khr end @checked function clFinalizeCommandBufferKHR(command_buffer) @@ -1401,167 +1739,211 @@ end @ccall libopencl.clReleaseCommandBufferKHR(command_buffer::cl_command_buffer_khr)::cl_int end -@checked function clEnqueueCommandBufferKHR(num_queues, queues, command_buffer, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueCommandBufferKHR(num_queues::cl_uint, - queues::Ptr{cl_command_queue}, - command_buffer::cl_command_buffer_khr, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clCommandBarrierWithWaitListKHR(command_buffer, command_queue, - num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, - mutable_handle) - @ccall libopencl.clCommandBarrierWithWaitListKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandCopyBufferKHR(command_buffer, command_queue, src_buffer, - dst_buffer, src_offset, dst_offset, size, - num_sync_points_in_wait_list, sync_point_wait_list, - sync_point, mutable_handle) - @ccall libopencl.clCommandCopyBufferKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - src_buffer::cl_mem, dst_buffer::cl_mem, - src_offset::Csize_t, dst_offset::Csize_t, - size::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandCopyBufferRectKHR(command_buffer, command_queue, src_buffer, - dst_buffer, src_origin, dst_origin, region, - src_row_pitch, src_slice_pitch, dst_row_pitch, - dst_slice_pitch, num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, - mutable_handle) - @ccall libopencl.clCommandCopyBufferRectKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - src_buffer::cl_mem, dst_buffer::cl_mem, - src_origin::Ptr{Csize_t}, - dst_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - src_row_pitch::Csize_t, - src_slice_pitch::Csize_t, - dst_row_pitch::Csize_t, - dst_slice_pitch::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandCopyBufferToImageKHR(command_buffer, command_queue, src_buffer, - dst_image, src_offset, dst_origin, region, - num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, - mutable_handle) - @ccall libopencl.clCommandCopyBufferToImageKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - src_buffer::cl_mem, dst_image::cl_mem, - src_offset::Csize_t, - dst_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandCopyImageKHR(command_buffer, command_queue, src_image, dst_image, - src_origin, dst_origin, region, - num_sync_points_in_wait_list, sync_point_wait_list, - sync_point, mutable_handle) - @ccall libopencl.clCommandCopyImageKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - src_image::cl_mem, dst_image::cl_mem, - src_origin::Ptr{Csize_t}, - dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandCopyImageToBufferKHR(command_buffer, command_queue, src_image, - dst_buffer, src_origin, region, dst_offset, - num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, - mutable_handle) - @ccall libopencl.clCommandCopyImageToBufferKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - src_image::cl_mem, dst_buffer::cl_mem, - src_origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - dst_offset::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandFillBufferKHR(command_buffer, command_queue, buffer, pattern, - pattern_size, offset, size, - num_sync_points_in_wait_list, sync_point_wait_list, - sync_point, mutable_handle) - @ccall libopencl.clCommandFillBufferKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, buffer::cl_mem, - pattern::Ptr{Cvoid}, pattern_size::Csize_t, - offset::Csize_t, size::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandFillImageKHR(command_buffer, command_queue, image, fill_color, - origin, region, num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, mutable_handle) - @ccall libopencl.clCommandFillImageKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, image::cl_mem, - fill_color::Ptr{Cvoid}, origin::Ptr{Csize_t}, - region::Ptr{Csize_t}, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandNDRangeKernelKHR(command_buffer, command_queue, properties, - kernel, work_dim, global_work_offset, - global_work_size, local_work_size, - num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, - mutable_handle) - @ccall libopencl.clCommandNDRangeKernelKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - properties::Ptr{cl_ndrange_kernel_command_properties_khr}, - kernel::cl_kernel, work_dim::cl_uint, - global_work_offset::Ptr{Csize_t}, - global_work_size::Ptr{Csize_t}, - local_work_size::Ptr{Csize_t}, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clGetCommandBufferInfoKHR(command_buffer, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetCommandBufferInfoKHR(command_buffer::cl_command_buffer_khr, - param_name::cl_command_buffer_info_khr, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clEnqueueCommandBufferKHR( + num_queues, queues, command_buffer, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueCommandBufferKHR( + num_queues::cl_uint, + queues::Ptr{cl_command_queue}, + command_buffer::cl_command_buffer_khr, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clCommandBarrierWithWaitListKHR( + command_buffer, command_queue, + num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, + mutable_handle + ) + @ccall libopencl.clCommandBarrierWithWaitListKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandCopyBufferKHR( + command_buffer, command_queue, src_buffer, + dst_buffer, src_offset, dst_offset, size, + num_sync_points_in_wait_list, sync_point_wait_list, + sync_point, mutable_handle + ) + @ccall libopencl.clCommandCopyBufferKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_offset::Csize_t, dst_offset::Csize_t, + size::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandCopyBufferRectKHR( + command_buffer, command_queue, src_buffer, + dst_buffer, src_origin, dst_origin, region, + src_row_pitch, src_slice_pitch, dst_row_pitch, + dst_slice_pitch, num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, + mutable_handle + ) + @ccall libopencl.clCommandCopyBufferRectKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + src_row_pitch::Csize_t, + src_slice_pitch::Csize_t, + dst_row_pitch::Csize_t, + dst_slice_pitch::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandCopyBufferToImageKHR( + command_buffer, command_queue, src_buffer, + dst_image, src_offset, dst_origin, region, + num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, + mutable_handle + ) + @ccall libopencl.clCommandCopyBufferToImageKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + src_buffer::cl_mem, dst_image::cl_mem, + src_offset::Csize_t, + dst_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandCopyImageKHR( + command_buffer, command_queue, src_image, dst_image, + src_origin, dst_origin, region, + num_sync_points_in_wait_list, sync_point_wait_list, + sync_point, mutable_handle + ) + @ccall libopencl.clCommandCopyImageKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + src_image::cl_mem, dst_image::cl_mem, + src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandCopyImageToBufferKHR( + command_buffer, command_queue, src_image, + dst_buffer, src_origin, region, dst_offset, + num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, + mutable_handle + ) + @ccall libopencl.clCommandCopyImageToBufferKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + src_image::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + dst_offset::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandFillBufferKHR( + command_buffer, command_queue, buffer, pattern, + pattern_size, offset, size, + num_sync_points_in_wait_list, sync_point_wait_list, + sync_point, mutable_handle + ) + @ccall libopencl.clCommandFillBufferKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, buffer::cl_mem, + pattern::Ptr{Cvoid}, pattern_size::Csize_t, + offset::Csize_t, size::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandFillImageKHR( + command_buffer, command_queue, image, fill_color, + origin, region, num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, mutable_handle + ) + @ccall libopencl.clCommandFillImageKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, image::cl_mem, + fill_color::Ptr{Cvoid}, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandNDRangeKernelKHR( + command_buffer, command_queue, properties, + kernel, work_dim, global_work_offset, + global_work_size, local_work_size, + num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, + mutable_handle + ) + @ccall libopencl.clCommandNDRangeKernelKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + properties::Ptr{cl_ndrange_kernel_command_properties_khr}, + kernel::cl_kernel, work_dim::cl_uint, + global_work_offset::Ptr{Csize_t}, + global_work_size::Ptr{Csize_t}, + local_work_size::Ptr{Csize_t}, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clGetCommandBufferInfoKHR( + command_buffer, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetCommandBufferInfoKHR( + command_buffer::cl_command_buffer_khr, + param_name::cl_command_buffer_info_khr, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end # typedef cl_int CL_API_CALL clCommandSVMMemcpyKHR_t ( cl_command_buffer_khr command_buffer , cl_command_queue command_queue , void * dst_ptr , const void * src_ptr , size_t size , cl_uint num_sync_points_in_wait_list , const cl_sync_point_khr * sync_point_wait_list , cl_sync_point_khr * sync_point , cl_mutable_command_khr * mutable_handle ) @@ -1576,30 +1958,38 @@ const clCommandSVMMemFillKHR_t = Cvoid # typedef clCommandSVMMemFillKHR_t * clCommandSVMMemFillKHR_fn const clCommandSVMMemFillKHR_fn = Ptr{clCommandSVMMemFillKHR_t} -@checked function clCommandSVMMemcpyKHR(command_buffer, command_queue, dst_ptr, src_ptr, - size, num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, mutable_handle) - @ccall libopencl.clCommandSVMMemcpyKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - dst_ptr::Ptr{Cvoid}, src_ptr::Ptr{Cvoid}, - size::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int -end - -@checked function clCommandSVMMemFillKHR(command_buffer, command_queue, svm_ptr, pattern, - pattern_size, size, num_sync_points_in_wait_list, - sync_point_wait_list, sync_point, mutable_handle) - @ccall libopencl.clCommandSVMMemFillKHR(command_buffer::cl_command_buffer_khr, - command_queue::cl_command_queue, - svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, - pattern_size::Csize_t, size::Csize_t, - num_sync_points_in_wait_list::cl_uint, - sync_point_wait_list::Ptr{cl_sync_point_khr}, - sync_point::Ptr{cl_sync_point_khr}, - mutable_handle::Ptr{cl_mutable_command_khr})::cl_int +@checked function clCommandSVMMemcpyKHR( + command_buffer, command_queue, dst_ptr, src_ptr, + size, num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, mutable_handle + ) + @ccall libopencl.clCommandSVMMemcpyKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + dst_ptr::Ptr{Cvoid}, src_ptr::Ptr{Cvoid}, + size::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int +end + +@checked function clCommandSVMMemFillKHR( + command_buffer, command_queue, svm_ptr, pattern, + pattern_size, size, num_sync_points_in_wait_list, + sync_point_wait_list, sync_point, mutable_handle + ) + @ccall libopencl.clCommandSVMMemFillKHR( + command_buffer::cl_command_buffer_khr, + command_queue::cl_command_queue, + svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, + pattern_size::Csize_t, size::Csize_t, + num_sync_points_in_wait_list::cl_uint, + sync_point_wait_list::Ptr{cl_sync_point_khr}, + sync_point::Ptr{cl_sync_point_khr}, + mutable_handle::Ptr{cl_mutable_command_khr} + )::cl_int end const cl_platform_command_buffer_capabilities_khr = cl_bitfield @@ -1610,15 +2000,19 @@ const clRemapCommandBufferKHR_t = Cvoid # typedef clRemapCommandBufferKHR_t * clRemapCommandBufferKHR_fn const clRemapCommandBufferKHR_fn = Ptr{clRemapCommandBufferKHR_t} -function clRemapCommandBufferKHR(command_buffer, automatic, num_queues, queues, num_handles, - handles, handles_ret, errcode_ret) - @ccall libopencl.clRemapCommandBufferKHR(command_buffer::cl_command_buffer_khr, - automatic::cl_bool, num_queues::cl_uint, - queues::Ptr{cl_command_queue}, - num_handles::cl_uint, - handles::Ptr{cl_mutable_command_khr}, - handles_ret::Ptr{cl_mutable_command_khr}, - errcode_ret::Ptr{cl_int})::cl_command_buffer_khr +function clRemapCommandBufferKHR( + command_buffer, automatic, num_queues, queues, num_handles, + handles, handles_ret, errcode_ret + ) + return @ccall libopencl.clRemapCommandBufferKHR( + command_buffer::cl_command_buffer_khr, + automatic::cl_bool, num_queues::cl_uint, + queues::Ptr{cl_command_queue}, + num_handles::cl_uint, + handles::Ptr{cl_mutable_command_khr}, + handles_ret::Ptr{cl_mutable_command_khr}, + errcode_ret::Ptr{cl_int} + )::cl_command_buffer_khr end const cl_command_buffer_structure_type_khr = cl_uint @@ -1685,17 +2079,23 @@ const clGetMutableCommandInfoKHR_t = Cvoid const clGetMutableCommandInfoKHR_fn = Ptr{clGetMutableCommandInfoKHR_t} @checked function clUpdateMutableCommandsKHR(command_buffer, mutable_config) - @ccall libopencl.clUpdateMutableCommandsKHR(command_buffer::cl_command_buffer_khr, - mutable_config::Ptr{cl_mutable_base_config_khr})::cl_int + @ccall libopencl.clUpdateMutableCommandsKHR( + command_buffer::cl_command_buffer_khr, + mutable_config::Ptr{cl_mutable_base_config_khr} + )::cl_int end -@checked function clGetMutableCommandInfoKHR(command, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetMutableCommandInfoKHR(command::cl_mutable_command_khr, - param_name::cl_mutable_command_info_khr, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetMutableCommandInfoKHR( + command, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetMutableCommandInfoKHR( + command::cl_mutable_command_khr, + param_name::cl_mutable_command_info_khr, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end # typedef cl_int CL_API_CALL clSetMemObjectDestructorAPPLE_t ( cl_mem memobj , void ( CL_CALLBACK * pfn_notify ) ( cl_mem memobj , void * user_data ) , void * user_data ) @@ -1705,8 +2105,10 @@ const clSetMemObjectDestructorAPPLE_t = Cvoid const clSetMemObjectDestructorAPPLE_fn = Ptr{clSetMemObjectDestructorAPPLE_t} @checked function clSetMemObjectDestructorAPPLE(memobj, pfn_notify, user_data) - @ccall libopencl.clSetMemObjectDestructorAPPLE(memobj::cl_mem, pfn_notify::Ptr{Cvoid}, - user_data::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetMemObjectDestructorAPPLE( + memobj::cl_mem, pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid} + )::cl_int end # typedef void CL_API_CALL clLogMessagesToSystemLogAPPLE_t ( const char * errstr , const void * private_info , size_t cb , void * user_data ) @@ -1728,21 +2130,27 @@ const clLogMessagesToStderrAPPLE_t = Cvoid const clLogMessagesToStderrAPPLE_fn = Ptr{clLogMessagesToStderrAPPLE_t} function clLogMessagesToSystemLogAPPLE(errstr, private_info, cb, user_data) - @ccall libopencl.clLogMessagesToSystemLogAPPLE(errstr::Ptr{Cchar}, - private_info::Ptr{Cvoid}, cb::Csize_t, - user_data::Ptr{Cvoid})::Cvoid + return @ccall libopencl.clLogMessagesToSystemLogAPPLE( + errstr::Ptr{Cchar}, + private_info::Ptr{Cvoid}, cb::Csize_t, + user_data::Ptr{Cvoid} + )::Cvoid end function clLogMessagesToStdoutAPPLE(errstr, private_info, cb, user_data) - @ccall libopencl.clLogMessagesToStdoutAPPLE(errstr::Ptr{Cchar}, - private_info::Ptr{Cvoid}, cb::Csize_t, - user_data::Ptr{Cvoid})::Cvoid + return @ccall libopencl.clLogMessagesToStdoutAPPLE( + errstr::Ptr{Cchar}, + private_info::Ptr{Cvoid}, cb::Csize_t, + user_data::Ptr{Cvoid} + )::Cvoid end function clLogMessagesToStderrAPPLE(errstr, private_info, cb, user_data) - @ccall libopencl.clLogMessagesToStderrAPPLE(errstr::Ptr{Cchar}, - private_info::Ptr{Cvoid}, cb::Csize_t, - user_data::Ptr{Cvoid})::Cvoid + return @ccall libopencl.clLogMessagesToStderrAPPLE( + errstr::Ptr{Cchar}, + private_info::Ptr{Cvoid}, cb::Csize_t, + user_data::Ptr{Cvoid} + )::Cvoid end # typedef cl_int CL_API_CALL clIcdGetPlatformIDsKHR_t ( cl_uint num_entries , cl_platform_id * platforms , cl_uint * num_platforms ) @@ -1752,9 +2160,11 @@ const clIcdGetPlatformIDsKHR_t = Cvoid const clIcdGetPlatformIDsKHR_fn = Ptr{clIcdGetPlatformIDsKHR_t} @checked function clIcdGetPlatformIDsKHR(num_entries, platforms, num_platforms) - @ccall libopencl.clIcdGetPlatformIDsKHR(num_entries::cl_uint, - platforms::Ptr{cl_platform_id}, - num_platforms::Ptr{cl_uint})::cl_int + @ccall libopencl.clIcdGetPlatformIDsKHR( + num_entries::cl_uint, + platforms::Ptr{cl_platform_id}, + num_platforms::Ptr{cl_uint} + )::cl_int end # typedef cl_program CL_API_CALL clCreateProgramWithILKHR_t ( cl_context context , const void * il , size_t length , cl_int * errcode_ret ) @@ -1764,9 +2174,11 @@ const clCreateProgramWithILKHR_t = Cvoid const clCreateProgramWithILKHR_fn = Ptr{clCreateProgramWithILKHR_t} function clCreateProgramWithILKHR(context, il, length, errcode_ret) - @ccall libopencl.clCreateProgramWithILKHR(context::cl_context, il::Ptr{Cvoid}, - length::Csize_t, - errcode_ret::Ptr{cl_int})::cl_program + return @ccall libopencl.clCreateProgramWithILKHR( + context::cl_context, il::Ptr{Cvoid}, + length::Csize_t, + errcode_ret::Ptr{cl_int} + )::cl_program end const cl_context_memory_initialize_khr = cl_bitfield @@ -1792,10 +2204,12 @@ const clCreateCommandQueueWithPropertiesKHR_t = Cvoid const clCreateCommandQueueWithPropertiesKHR_fn = Ptr{clCreateCommandQueueWithPropertiesKHR_t} function clCreateCommandQueueWithPropertiesKHR(context, device, properties, errcode_ret) - @ccall libopencl.clCreateCommandQueueWithPropertiesKHR(context::cl_context, - device::cl_device_id, - properties::Ptr{cl_queue_properties_khr}, - errcode_ret::Ptr{cl_int})::cl_command_queue + return @ccall libopencl.clCreateCommandQueueWithPropertiesKHR( + context::cl_context, + device::cl_device_id, + properties::Ptr{cl_queue_properties_khr}, + errcode_ret::Ptr{cl_int} + )::cl_command_queue end # typedef cl_int CL_API_CALL clReleaseDeviceEXT_t ( cl_device_id device ) @@ -1824,13 +2238,17 @@ end @ccall libopencl.clRetainDeviceEXT(device::cl_device_id)::cl_int end -@checked function clCreateSubDevicesEXT(in_device, properties, num_entries, out_devices, - num_devices) - @ccall libopencl.clCreateSubDevicesEXT(in_device::cl_device_id, - properties::Ptr{cl_device_partition_property_ext}, - num_entries::cl_uint, - out_devices::Ptr{cl_device_id}, - num_devices::Ptr{cl_uint})::cl_int +@checked function clCreateSubDevicesEXT( + in_device, properties, num_entries, out_devices, + num_devices + ) + @ccall libopencl.clCreateSubDevicesEXT( + in_device::cl_device_id, + properties::Ptr{cl_device_partition_property_ext}, + num_entries::cl_uint, + out_devices::Ptr{cl_device_id}, + num_devices::Ptr{cl_uint} + )::cl_int end const cl_mem_migration_flags_ext = cl_bitfield @@ -1841,16 +2259,20 @@ const clEnqueueMigrateMemObjectEXT_t = Cvoid # typedef clEnqueueMigrateMemObjectEXT_t * clEnqueueMigrateMemObjectEXT_fn const clEnqueueMigrateMemObjectEXT_fn = Ptr{clEnqueueMigrateMemObjectEXT_t} -@checked function clEnqueueMigrateMemObjectEXT(command_queue, num_mem_objects, mem_objects, - flags, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueMigrateMemObjectEXT(command_queue::cl_command_queue, - num_mem_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - flags::cl_mem_migration_flags_ext, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueMigrateMemObjectEXT( + command_queue, num_mem_objects, mem_objects, + flags, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueMigrateMemObjectEXT( + command_queue::cl_command_queue, + num_mem_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + flags::cl_mem_migration_flags_ext, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end const cl_image_pitch_info_qcom = cl_uint @@ -1868,16 +2290,20 @@ const clGetDeviceImageInfoQCOM_t = Cvoid # typedef clGetDeviceImageInfoQCOM_t * clGetDeviceImageInfoQCOM_fn const clGetDeviceImageInfoQCOM_fn = Ptr{clGetDeviceImageInfoQCOM_t} -@checked function clGetDeviceImageInfoQCOM(device, image_width, image_height, image_format, - param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetDeviceImageInfoQCOM(device::cl_device_id, image_width::Csize_t, - image_height::Csize_t, - image_format::Ptr{cl_image_format}, - param_name::cl_image_pitch_info_qcom, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetDeviceImageInfoQCOM( + device, image_width, image_height, image_format, + param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetDeviceImageInfoQCOM( + device::cl_device_id, image_width::Csize_t, + image_height::Csize_t, + image_format::Ptr{cl_image_format}, + param_name::cl_image_pitch_info_qcom, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end struct _cl_mem_ion_host_ptr @@ -1907,26 +2333,34 @@ const clEnqueueReleaseGrallocObjectsIMG_t = Cvoid # typedef clEnqueueReleaseGrallocObjectsIMG_t * clEnqueueReleaseGrallocObjectsIMG_fn const clEnqueueReleaseGrallocObjectsIMG_fn = Ptr{clEnqueueReleaseGrallocObjectsIMG_t} -@checked function clEnqueueAcquireGrallocObjectsIMG(command_queue, num_objects, mem_objects, - num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueAcquireGrallocObjectsIMG(command_queue::cl_command_queue, - num_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueReleaseGrallocObjectsIMG(command_queue, num_objects, mem_objects, - num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueReleaseGrallocObjectsIMG(command_queue::cl_command_queue, - num_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueAcquireGrallocObjectsIMG( + command_queue, num_objects, mem_objects, + num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueAcquireGrallocObjectsIMG( + command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueReleaseGrallocObjectsIMG( + command_queue, num_objects, mem_objects, + num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueReleaseGrallocObjectsIMG( + command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end const cl_mipmap_filter_mode_img = cl_uint @@ -1937,18 +2371,22 @@ const clEnqueueGenerateMipmapIMG_t = Cvoid # typedef clEnqueueGenerateMipmapIMG_t * clEnqueueGenerateMipmapIMG_fn const clEnqueueGenerateMipmapIMG_fn = Ptr{clEnqueueGenerateMipmapIMG_t} -@checked function clEnqueueGenerateMipmapIMG(command_queue, src_image, dst_image, - mipmap_filter_mode, array_region, mip_region, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueGenerateMipmapIMG(command_queue::cl_command_queue, - src_image::cl_mem, dst_image::cl_mem, - mipmap_filter_mode::cl_mipmap_filter_mode_img, - array_region::Ptr{Csize_t}, - mip_region::Ptr{Csize_t}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueGenerateMipmapIMG( + command_queue, src_image, dst_image, + mipmap_filter_mode, array_region, mip_region, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueGenerateMipmapIMG( + command_queue::cl_command_queue, + src_image::cl_mem, dst_image::cl_mem, + mipmap_filter_mode::cl_mipmap_filter_mode_img, + array_region::Ptr{Csize_t}, + mip_region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end # typedef cl_int CL_API_CALL clGetKernelSubGroupInfoKHR_t ( cl_kernel in_kernel , cl_device_id in_device , cl_kernel_sub_group_info param_name , size_t input_value_size , const void * input_value , size_t param_value_size , void * param_value , size_t * param_value_size_ret ) @@ -1957,18 +2395,22 @@ const clGetKernelSubGroupInfoKHR_t = Cvoid # typedef clGetKernelSubGroupInfoKHR_t * clGetKernelSubGroupInfoKHR_fn const clGetKernelSubGroupInfoKHR_fn = Ptr{clGetKernelSubGroupInfoKHR_t} -@checked function clGetKernelSubGroupInfoKHR(in_kernel, in_device, param_name, - input_value_size, input_value, - param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetKernelSubGroupInfoKHR(in_kernel::cl_kernel, - in_device::cl_device_id, - param_name::cl_kernel_sub_group_info, - input_value_size::Csize_t, - input_value::Ptr{Cvoid}, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetKernelSubGroupInfoKHR( + in_kernel, in_device, param_name, + input_value_size, input_value, + param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetKernelSubGroupInfoKHR( + in_kernel::cl_kernel, + in_device::cl_device_id, + param_name::cl_kernel_sub_group_info, + input_value_size::Csize_t, + input_value::Ptr{Cvoid}, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end const cl_queue_priority_khr = cl_uint @@ -1979,7 +2421,7 @@ const cl_version_khr = cl_uint struct _cl_name_version_khr version::cl_version_khr - name::NTuple{64,Cchar} + name::NTuple{64, Cchar} end const cl_name_version_khr = _cl_name_version_khr @@ -1999,15 +2441,19 @@ const clGetKernelSuggestedLocalWorkSizeKHR_t = Cvoid # typedef clGetKernelSuggestedLocalWorkSizeKHR_t * clGetKernelSuggestedLocalWorkSizeKHR_fn const clGetKernelSuggestedLocalWorkSizeKHR_fn = Ptr{clGetKernelSuggestedLocalWorkSizeKHR_t} -@checked function clGetKernelSuggestedLocalWorkSizeKHR(command_queue, kernel, work_dim, - global_work_offset, global_work_size, - suggested_local_work_size) - @ccall libopencl.clGetKernelSuggestedLocalWorkSizeKHR(command_queue::cl_command_queue, - kernel::cl_kernel, - work_dim::cl_uint, - global_work_offset::Ptr{Csize_t}, - global_work_size::Ptr{Csize_t}, - suggested_local_work_size::Ptr{Csize_t})::cl_int +@checked function clGetKernelSuggestedLocalWorkSizeKHR( + command_queue, kernel, work_dim, + global_work_offset, global_work_size, + suggested_local_work_size + ) + @ccall libopencl.clGetKernelSuggestedLocalWorkSizeKHR( + command_queue::cl_command_queue, + kernel::cl_kernel, + work_dim::cl_uint, + global_work_offset::Ptr{Csize_t}, + global_work_size::Ptr{Csize_t}, + suggested_local_work_size::Ptr{Csize_t} + )::cl_int end const cl_device_integer_dot_product_capabilities_khr = cl_bitfield @@ -2037,28 +2483,36 @@ const clEnqueueReleaseExternalMemObjectsKHR_t = Cvoid # typedef clEnqueueReleaseExternalMemObjectsKHR_t * clEnqueueReleaseExternalMemObjectsKHR_fn const clEnqueueReleaseExternalMemObjectsKHR_fn = Ptr{clEnqueueReleaseExternalMemObjectsKHR_t} -@checked function clEnqueueAcquireExternalMemObjectsKHR(command_queue, num_mem_objects, - mem_objects, - num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueAcquireExternalMemObjectsKHR(command_queue::cl_command_queue, - num_mem_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueReleaseExternalMemObjectsKHR(command_queue, num_mem_objects, - mem_objects, - num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueReleaseExternalMemObjectsKHR(command_queue::cl_command_queue, - num_mem_objects::cl_uint, - mem_objects::Ptr{cl_mem}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueAcquireExternalMemObjectsKHR( + command_queue, num_mem_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueAcquireExternalMemObjectsKHR( + command_queue::cl_command_queue, + num_mem_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueReleaseExternalMemObjectsKHR( + command_queue, num_mem_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueReleaseExternalMemObjectsKHR( + command_queue::cl_command_queue, + num_mem_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end mutable struct _cl_semaphore_khr end @@ -2073,14 +2527,18 @@ const clGetSemaphoreHandleForTypeKHR_t = Cvoid # typedef clGetSemaphoreHandleForTypeKHR_t * clGetSemaphoreHandleForTypeKHR_fn const clGetSemaphoreHandleForTypeKHR_fn = Ptr{clGetSemaphoreHandleForTypeKHR_t} -@checked function clGetSemaphoreHandleForTypeKHR(sema_object, device, handle_type, - handle_size, handle_ptr, handle_size_ret) - @ccall libopencl.clGetSemaphoreHandleForTypeKHR(sema_object::cl_semaphore_khr, - device::cl_device_id, - handle_type::cl_external_semaphore_handle_type_khr, - handle_size::Csize_t, - handle_ptr::Ptr{Cvoid}, - handle_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetSemaphoreHandleForTypeKHR( + sema_object, device, handle_type, + handle_size, handle_ptr, handle_size_ret + ) + @ccall libopencl.clGetSemaphoreHandleForTypeKHR( + sema_object::cl_semaphore_khr, + device::cl_device_id, + handle_type::cl_external_semaphore_handle_type_khr, + handle_size::Csize_t, + handle_ptr::Ptr{Cvoid}, + handle_size_ret::Ptr{Csize_t} + )::cl_int end const cl_semaphore_reimport_properties_khr = cl_properties @@ -2092,9 +2550,11 @@ const clReImportSemaphoreSyncFdKHR_t = Cvoid const clReImportSemaphoreSyncFdKHR_fn = Ptr{clReImportSemaphoreSyncFdKHR_t} @checked function clReImportSemaphoreSyncFdKHR(sema_object, reimport_props, fd) - @ccall libopencl.clReImportSemaphoreSyncFdKHR(sema_object::cl_semaphore_khr, - reimport_props::Ptr{cl_semaphore_reimport_properties_khr}, - fd::Cint)::cl_int + @ccall libopencl.clReImportSemaphoreSyncFdKHR( + sema_object::cl_semaphore_khr, + reimport_props::Ptr{cl_semaphore_reimport_properties_khr}, + fd::Cint + )::cl_int end const cl_semaphore_properties_khr = cl_properties @@ -2142,43 +2602,57 @@ const clRetainSemaphoreKHR_t = Cvoid const clRetainSemaphoreKHR_fn = Ptr{clRetainSemaphoreKHR_t} function clCreateSemaphoreWithPropertiesKHR(context, sema_props, errcode_ret) - @ccall libopencl.clCreateSemaphoreWithPropertiesKHR(context::cl_context, - sema_props::Ptr{cl_semaphore_properties_khr}, - errcode_ret::Ptr{cl_int})::cl_semaphore_khr -end - -@checked function clEnqueueWaitSemaphoresKHR(command_queue, num_sema_objects, sema_objects, - sema_payload_list, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueWaitSemaphoresKHR(command_queue::cl_command_queue, - num_sema_objects::cl_uint, - sema_objects::Ptr{cl_semaphore_khr}, - sema_payload_list::Ptr{cl_semaphore_payload_khr}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSignalSemaphoresKHR(command_queue, num_sema_objects, - sema_objects, sema_payload_list, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueSignalSemaphoresKHR(command_queue::cl_command_queue, - num_sema_objects::cl_uint, - sema_objects::Ptr{cl_semaphore_khr}, - sema_payload_list::Ptr{cl_semaphore_payload_khr}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clGetSemaphoreInfoKHR(sema_object, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetSemaphoreInfoKHR(sema_object::cl_semaphore_khr, - param_name::cl_semaphore_info_khr, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int + return @ccall libopencl.clCreateSemaphoreWithPropertiesKHR( + context::cl_context, + sema_props::Ptr{cl_semaphore_properties_khr}, + errcode_ret::Ptr{cl_int} + )::cl_semaphore_khr +end + +@checked function clEnqueueWaitSemaphoresKHR( + command_queue, num_sema_objects, sema_objects, + sema_payload_list, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueWaitSemaphoresKHR( + command_queue::cl_command_queue, + num_sema_objects::cl_uint, + sema_objects::Ptr{cl_semaphore_khr}, + sema_payload_list::Ptr{cl_semaphore_payload_khr}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSignalSemaphoresKHR( + command_queue, num_sema_objects, + sema_objects, sema_payload_list, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueSignalSemaphoresKHR( + command_queue::cl_command_queue, + num_sema_objects::cl_uint, + sema_objects::Ptr{cl_semaphore_khr}, + sema_payload_list::Ptr{cl_semaphore_payload_khr}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clGetSemaphoreInfoKHR( + sema_object, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetSemaphoreInfoKHR( + sema_object::cl_semaphore_khr, + param_name::cl_semaphore_info_khr, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clReleaseSemaphoreKHR(sema_object) @@ -2198,10 +2672,12 @@ const clImportMemoryARM_t = Cvoid const clImportMemoryARM_fn = Ptr{clImportMemoryARM_t} function clImportMemoryARM(context, flags, properties, memory, size, errcode_ret) - @ccall libopencl.clImportMemoryARM(context::cl_context, flags::cl_mem_flags, - properties::Ptr{cl_import_properties_arm}, - memory::Ptr{Cvoid}, size::Csize_t, - errcode_ret::Ptr{cl_int})::cl_mem + return @ccall libopencl.clImportMemoryARM( + context::cl_context, flags::cl_mem_flags, + properties::Ptr{cl_import_properties_arm}, + memory::Ptr{Cvoid}, size::Csize_t, + errcode_ret::Ptr{cl_int} + )::cl_mem end const cl_svm_mem_flags_arm = cl_bitfield @@ -2265,77 +2741,103 @@ const clSetKernelExecInfoARM_t = Cvoid const clSetKernelExecInfoARM_fn = Ptr{clSetKernelExecInfoARM_t} function clSVMAllocARM(context, flags, size, alignment) - @ccall libopencl.clSVMAllocARM(context::cl_context, flags::cl_svm_mem_flags_arm, - size::Csize_t, alignment::cl_uint)::Ptr{Cvoid} + return @ccall libopencl.clSVMAllocARM( + context::cl_context, flags::cl_svm_mem_flags_arm, + size::Csize_t, alignment::cl_uint + )::Ptr{Cvoid} end function clSVMFreeARM(context, svm_pointer) - @ccall libopencl.clSVMFreeARM(context::cl_context, svm_pointer::Ptr{Cvoid})::Cvoid -end - -@checked function clEnqueueSVMFreeARM(command_queue, num_svm_pointers, svm_pointers, - pfn_free_func, user_data, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueSVMFreeARM(command_queue::cl_command_queue, - num_svm_pointers::cl_uint, - svm_pointers::Ptr{Ptr{Cvoid}}, - pfn_free_func::Ptr{Cvoid}, user_data::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMemcpyARM(command_queue, blocking_copy, dst_ptr, src_ptr, - size, num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueSVMMemcpyARM(command_queue::cl_command_queue, - blocking_copy::cl_bool, dst_ptr::Ptr{Cvoid}, - src_ptr::Ptr{Cvoid}, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMemFillARM(command_queue, svm_ptr, pattern, pattern_size, - size, num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueSVMMemFillARM(command_queue::cl_command_queue, - svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, - pattern_size::Csize_t, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMMapARM(command_queue, blocking_map, flags, svm_ptr, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueSVMMapARM(command_queue::cl_command_queue, - blocking_map::cl_bool, flags::cl_map_flags, - svm_ptr::Ptr{Cvoid}, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueSVMUnmapARM(command_queue, svm_ptr, num_events_in_wait_list, - event_wait_list, event) - @ccall libopencl.clEnqueueSVMUnmapARM(command_queue::cl_command_queue, - svm_ptr::Ptr{Cvoid}, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int + return @ccall libopencl.clSVMFreeARM(context::cl_context, svm_pointer::Ptr{Cvoid})::Cvoid +end + +@checked function clEnqueueSVMFreeARM( + command_queue, num_svm_pointers, svm_pointers, + pfn_free_func, user_data, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMFreeARM( + command_queue::cl_command_queue, + num_svm_pointers::cl_uint, + svm_pointers::Ptr{Ptr{Cvoid}}, + pfn_free_func::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMemcpyARM( + command_queue, blocking_copy, dst_ptr, src_ptr, + size, num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueSVMMemcpyARM( + command_queue::cl_command_queue, + blocking_copy::cl_bool, dst_ptr::Ptr{Cvoid}, + src_ptr::Ptr{Cvoid}, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMemFillARM( + command_queue, svm_ptr, pattern, pattern_size, + size, num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueSVMMemFillARM( + command_queue::cl_command_queue, + svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, + pattern_size::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMMapARM( + command_queue, blocking_map, flags, svm_ptr, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMMapARM( + command_queue::cl_command_queue, + blocking_map::cl_bool, flags::cl_map_flags, + svm_ptr::Ptr{Cvoid}, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueSVMUnmapARM( + command_queue, svm_ptr, num_events_in_wait_list, + event_wait_list, event + ) + @ccall libopencl.clEnqueueSVMUnmapARM( + command_queue::cl_command_queue, + svm_ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end @checked function clSetKernelArgSVMPointerARM(kernel, arg_index, arg_value) - @ccall libopencl.clSetKernelArgSVMPointerARM(kernel::cl_kernel, arg_index::cl_uint, - arg_value::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetKernelArgSVMPointerARM( + kernel::cl_kernel, arg_index::cl_uint, + arg_value::Ptr{Cvoid} + )::cl_int end @checked function clSetKernelExecInfoARM(kernel, param_name, param_value_size, param_value) - @ccall libopencl.clSetKernelExecInfoARM(kernel::cl_kernel, - param_name::cl_kernel_exec_info_arm, - param_value_size::Csize_t, - param_value::Ptr{Cvoid})::cl_int + @ccall libopencl.clSetKernelExecInfoARM( + kernel::cl_kernel, + param_name::cl_kernel_exec_info_arm, + param_value_size::Csize_t, + param_value::Ptr{Cvoid} + )::cl_int end const cl_device_scheduling_controls_capabilities_arm = cl_bitfield @@ -2376,22 +2878,30 @@ const clReleaseAcceleratorINTEL_t = Cvoid # typedef clReleaseAcceleratorINTEL_t * clReleaseAcceleratorINTEL_fn const clReleaseAcceleratorINTEL_fn = Ptr{clReleaseAcceleratorINTEL_t} -function clCreateAcceleratorINTEL(context, accelerator_type, descriptor_size, descriptor, - errcode_ret) - @ccall libopencl.clCreateAcceleratorINTEL(context::cl_context, - accelerator_type::cl_accelerator_type_intel, - descriptor_size::Csize_t, - descriptor::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_accelerator_intel -end - -@checked function clGetAcceleratorInfoINTEL(accelerator, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetAcceleratorInfoINTEL(accelerator::cl_accelerator_intel, - param_name::cl_accelerator_info_intel, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +function clCreateAcceleratorINTEL( + context, accelerator_type, descriptor_size, descriptor, + errcode_ret + ) + return @ccall libopencl.clCreateAcceleratorINTEL( + context::cl_context, + accelerator_type::cl_accelerator_type_intel, + descriptor_size::Csize_t, + descriptor::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_accelerator_intel +end + +@checked function clGetAcceleratorInfoINTEL( + accelerator, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetAcceleratorInfoINTEL( + accelerator::cl_accelerator_intel, + param_name::cl_accelerator_info_intel, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clRetainAcceleratorINTEL(accelerator) @@ -2486,24 +2996,30 @@ const clEnqueueMemAdviseINTEL_t = Cvoid const clEnqueueMemAdviseINTEL_fn = Ptr{clEnqueueMemAdviseINTEL_t} function clHostMemAllocINTEL(context, properties, size, alignment, errcode_ret) - @ccall libopencl.clHostMemAllocINTEL(context::cl_context, - properties::Ptr{cl_mem_properties_intel}, - size::Csize_t, alignment::cl_uint, - errcode_ret::Ptr{cl_int})::Ptr{Cvoid} + return @ccall libopencl.clHostMemAllocINTEL( + context::cl_context, + properties::Ptr{cl_mem_properties_intel}, + size::Csize_t, alignment::cl_uint, + errcode_ret::Ptr{cl_int} + )::Ptr{Cvoid} end function clDeviceMemAllocINTEL(context, device, properties, size, alignment, errcode_ret) - @ccall libopencl.clDeviceMemAllocINTEL(context::cl_context, device::cl_device_id, - properties::Ptr{cl_mem_properties_intel}, - size::Csize_t, alignment::cl_uint, - errcode_ret::Ptr{cl_int})::Ptr{Cvoid} + return @ccall libopencl.clDeviceMemAllocINTEL( + context::cl_context, device::cl_device_id, + properties::Ptr{cl_mem_properties_intel}, + size::Csize_t, alignment::cl_uint, + errcode_ret::Ptr{cl_int} + )::Ptr{Cvoid} end function clSharedMemAllocINTEL(context, device, properties, size, alignment, errcode_ret) - @ccall libopencl.clSharedMemAllocINTEL(context::cl_context, device::cl_device_id, - properties::Ptr{cl_mem_properties_intel}, - size::Csize_t, alignment::cl_uint, - errcode_ret::Ptr{cl_int})::Ptr{Cvoid} + return @ccall libopencl.clSharedMemAllocINTEL( + context::cl_context, device::cl_device_id, + properties::Ptr{cl_mem_properties_intel}, + size::Csize_t, alignment::cl_uint, + errcode_ret::Ptr{cl_int} + )::Ptr{Cvoid} end @checked function clMemFreeINTEL(context, ptr) @@ -2514,48 +3030,66 @@ end @ccall libopencl.clMemBlockingFreeINTEL(context::cl_context, ptr::Ptr{Cvoid})::cl_int end -@checked function clGetMemAllocInfoINTEL(context, ptr, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetMemAllocInfoINTEL(context::cl_context, ptr::Ptr{Cvoid}, - param_name::cl_mem_info_intel, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetMemAllocInfoINTEL( + context, ptr, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetMemAllocInfoINTEL( + context::cl_context, ptr::Ptr{Cvoid}, + param_name::cl_mem_info_intel, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end @checked function clSetKernelArgMemPointerINTEL(kernel, arg_index, arg_value) - @ccall libopencl.clSetKernelArgMemPointerINTEL(kernel::cl_kernel, arg_index::cl_uint, - arg_value::Ptr{Cvoid})::cl_int -end - -@checked function clEnqueueMemFillINTEL(command_queue, dst_ptr, pattern, pattern_size, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueMemFillINTEL(command_queue::cl_command_queue, - dst_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, - pattern_size::Csize_t, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueMemcpyINTEL(command_queue, blocking, dst_ptr, src_ptr, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueMemcpyINTEL(command_queue::cl_command_queue, - blocking::cl_bool, dst_ptr::Ptr{Cvoid}, - src_ptr::Ptr{Cvoid}, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueMemAdviseINTEL(command_queue, ptr, size, advice, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueMemAdviseINTEL(command_queue::cl_command_queue, - ptr::Ptr{Cvoid}, size::Csize_t, - advice::cl_mem_advice_intel, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int + @ccall libopencl.clSetKernelArgMemPointerINTEL( + kernel::cl_kernel, arg_index::cl_uint, + arg_value::Ptr{Cvoid} + )::cl_int +end + +@checked function clEnqueueMemFillINTEL( + command_queue, dst_ptr, pattern, pattern_size, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueMemFillINTEL( + command_queue::cl_command_queue, + dst_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, + pattern_size::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueMemcpyINTEL( + command_queue, blocking, dst_ptr, src_ptr, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueMemcpyINTEL( + command_queue::cl_command_queue, + blocking::cl_bool, dst_ptr::Ptr{Cvoid}, + src_ptr::Ptr{Cvoid}, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueMemAdviseINTEL( + command_queue, ptr, size, advice, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueMemAdviseINTEL( + command_queue::cl_command_queue, + ptr::Ptr{Cvoid}, size::Csize_t, + advice::cl_mem_advice_intel, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end # typedef cl_int CL_API_CALL clEnqueueMigrateMemINTEL_t ( cl_command_queue command_queue , const void * ptr , size_t size , cl_mem_migration_flags flags , cl_uint num_events_in_wait_list , const cl_event * event_wait_list , cl_event * event ) @@ -2564,14 +3098,18 @@ const clEnqueueMigrateMemINTEL_t = Cvoid # typedef clEnqueueMigrateMemINTEL_t * clEnqueueMigrateMemINTEL_fn const clEnqueueMigrateMemINTEL_fn = Ptr{clEnqueueMigrateMemINTEL_t} -@checked function clEnqueueMigrateMemINTEL(command_queue, ptr, size, flags, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueMigrateMemINTEL(command_queue::cl_command_queue, - ptr::Ptr{Cvoid}, size::Csize_t, - flags::cl_mem_migration_flags, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueMigrateMemINTEL( + command_queue, ptr, size, flags, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueMigrateMemINTEL( + command_queue::cl_command_queue, + ptr::Ptr{Cvoid}, size::Csize_t, + flags::cl_mem_migration_flags, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end # typedef cl_int CL_API_CALL clEnqueueMemsetINTEL_t ( cl_command_queue command_queue , void * dst_ptr , cl_int value , size_t size , cl_uint num_events_in_wait_list , const cl_event * event_wait_list , cl_event * event ) @@ -2580,13 +3118,17 @@ const clEnqueueMemsetINTEL_t = Cvoid # typedef clEnqueueMemsetINTEL_t * clEnqueueMemsetINTEL_fn const clEnqueueMemsetINTEL_fn = Ptr{clEnqueueMemsetINTEL_t} -@checked function clEnqueueMemsetINTEL(command_queue, dst_ptr, value, size, - num_events_in_wait_list, event_wait_list, event) - @ccall libopencl.clEnqueueMemsetINTEL(command_queue::cl_command_queue, - dst_ptr::Ptr{Cvoid}, value::cl_int, size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueMemsetINTEL( + command_queue, dst_ptr, value, size, + num_events_in_wait_list, event_wait_list, event + ) + @ccall libopencl.clEnqueueMemsetINTEL( + command_queue::cl_command_queue, + dst_ptr::Ptr{Cvoid}, value::cl_int, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end # typedef cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL_t ( cl_context context , const cl_mem_properties_intel * properties , cl_mem_flags flags , size_t size , void * host_ptr , cl_int * errcode_ret ) @@ -2595,13 +3137,17 @@ const clCreateBufferWithPropertiesINTEL_t = Cvoid # typedef clCreateBufferWithPropertiesINTEL_t * clCreateBufferWithPropertiesINTEL_fn const clCreateBufferWithPropertiesINTEL_fn = Ptr{clCreateBufferWithPropertiesINTEL_t} -function clCreateBufferWithPropertiesINTEL(context, properties, flags, size, host_ptr, - errcode_ret) - @ccall libopencl.clCreateBufferWithPropertiesINTEL(context::cl_context, - properties::Ptr{cl_mem_properties_intel}, - flags::cl_mem_flags, size::Csize_t, - host_ptr::Ptr{Cvoid}, - errcode_ret::Ptr{cl_int})::cl_mem +function clCreateBufferWithPropertiesINTEL( + context, properties, flags, size, host_ptr, + errcode_ret + ) + return @ccall libopencl.clCreateBufferWithPropertiesINTEL( + context::cl_context, + properties::Ptr{cl_mem_properties_intel}, + flags::cl_mem_flags, size::Csize_t, + host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int} + )::cl_mem end # typedef cl_int CL_API_CALL clEnqueueReadHostPipeINTEL_t ( cl_command_queue command_queue , cl_program program , const char * pipe_symbol , cl_bool blocking_read , void * ptr , size_t size , cl_uint num_events_in_wait_list , const cl_event * event_wait_list , cl_event * event ) @@ -2616,32 +3162,40 @@ const clEnqueueWriteHostPipeINTEL_t = Cvoid # typedef clEnqueueWriteHostPipeINTEL_t * clEnqueueWriteHostPipeINTEL_fn const clEnqueueWriteHostPipeINTEL_fn = Ptr{clEnqueueWriteHostPipeINTEL_t} -@checked function clEnqueueReadHostPipeINTEL(command_queue, program, pipe_symbol, - blocking_read, ptr, size, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueReadHostPipeINTEL(command_queue::cl_command_queue, - program::cl_program, - pipe_symbol::Ptr{Cchar}, - blocking_read::cl_bool, ptr::Ptr{Cvoid}, - size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int -end - -@checked function clEnqueueWriteHostPipeINTEL(command_queue, program, pipe_symbol, - blocking_write, ptr, size, - num_events_in_wait_list, event_wait_list, - event) - @ccall libopencl.clEnqueueWriteHostPipeINTEL(command_queue::cl_command_queue, - program::cl_program, - pipe_symbol::Ptr{Cchar}, - blocking_write::cl_bool, ptr::Ptr{Cvoid}, - size::Csize_t, - num_events_in_wait_list::cl_uint, - event_wait_list::Ptr{cl_event}, - event::Ptr{cl_event})::cl_int +@checked function clEnqueueReadHostPipeINTEL( + command_queue, program, pipe_symbol, + blocking_read, ptr, size, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueReadHostPipeINTEL( + command_queue::cl_command_queue, + program::cl_program, + pipe_symbol::Ptr{Cchar}, + blocking_read::cl_bool, ptr::Ptr{Cvoid}, + size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int +end + +@checked function clEnqueueWriteHostPipeINTEL( + command_queue, program, pipe_symbol, + blocking_write, ptr, size, + num_events_in_wait_list, event_wait_list, + event + ) + @ccall libopencl.clEnqueueWriteHostPipeINTEL( + command_queue::cl_command_queue, + program::cl_program, + pipe_symbol::Ptr{Cchar}, + blocking_write::cl_bool, ptr::Ptr{Cvoid}, + size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event} + )::cl_int end const cl_command_queue_capabilities_intel = cl_bitfield @@ -2650,7 +3204,7 @@ struct _cl_queue_family_properties_intel properties::cl_command_queue_properties capabilities::cl_command_queue_capabilities_intel count::cl_uint - name::NTuple{64,Cchar} + name::NTuple{64, Cchar} end const cl_queue_family_properties_intel = _cl_queue_family_properties_intel @@ -2663,18 +3217,22 @@ const clGetImageRequirementsInfoEXT_t = Cvoid # typedef clGetImageRequirementsInfoEXT_t * clGetImageRequirementsInfoEXT_fn const clGetImageRequirementsInfoEXT_fn = Ptr{clGetImageRequirementsInfoEXT_t} -@checked function clGetImageRequirementsInfoEXT(context, properties, flags, image_format, - image_desc, param_name, param_value_size, - param_value, param_value_size_ret) - @ccall libopencl.clGetImageRequirementsInfoEXT(context::cl_context, - properties::Ptr{cl_mem_properties}, - flags::cl_mem_flags, - image_format::Ptr{cl_image_format}, - image_desc::Ptr{cl_image_desc}, - param_name::cl_image_requirements_info_ext, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetImageRequirementsInfoEXT( + context, properties, flags, image_format, + image_desc, param_name, param_value_size, + param_value, param_value_size_ret + ) + @ccall libopencl.clGetImageRequirementsInfoEXT( + context::cl_context, + properties::Ptr{cl_mem_properties}, + flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_desc::Ptr{cl_image_desc}, + param_name::cl_image_requirements_info_ext, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end const cl_icdl_info = cl_uint @@ -2685,12 +3243,16 @@ const clGetICDLoaderInfoOCLICD_t = Cvoid # typedef clGetICDLoaderInfoOCLICD_t * clGetICDLoaderInfoOCLICD_fn const clGetICDLoaderInfoOCLICD_fn = Ptr{clGetICDLoaderInfoOCLICD_t} -@checked function clGetICDLoaderInfoOCLICD(param_name, param_value_size, param_value, - param_value_size_ret) - @ccall libopencl.clGetICDLoaderInfoOCLICD(param_name::cl_icdl_info, - param_value_size::Csize_t, - param_value::Ptr{Cvoid}, - param_value_size_ret::Ptr{Csize_t})::cl_int +@checked function clGetICDLoaderInfoOCLICD( + param_name, param_value_size, param_value, + param_value_size_ret + ) + @ccall libopencl.clGetICDLoaderInfoOCLICD( + param_name::cl_icdl_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t} + )::cl_int end const cl_device_fp_atomic_capabilities_ext = cl_bitfield @@ -2702,8 +3264,10 @@ const clSetContentSizeBufferPoCL_t = Cvoid const clSetContentSizeBufferPoCL_fn = Ptr{clSetContentSizeBufferPoCL_t} @checked function clSetContentSizeBufferPoCL(buffer, content_size_buffer) - @ccall libopencl.clSetContentSizeBufferPoCL(buffer::cl_mem, - content_size_buffer::cl_mem)::cl_int + @ccall libopencl.clSetContentSizeBufferPoCL( + buffer::cl_mem, + content_size_buffer::cl_mem + )::cl_int end const cl_device_kernel_clock_capabilities_khr = cl_bitfield @@ -2715,8 +3279,10 @@ const clCancelCommandsIMG_t = Cvoid const clCancelCommandsIMG_fn = Ptr{clCancelCommandsIMG_t} @checked function clCancelCommandsIMG(event_list, num_events_in_list) - @ccall libopencl.clCancelCommandsIMG(event_list::Ptr{cl_event}, - num_events_in_list::Csize_t)::cl_int + @ccall libopencl.clCancelCommandsIMG( + event_list::Ptr{cl_event}, + num_events_in_list::Csize_t + )::cl_int end const CL_NAME_VERSION_MAX_NAME_SIZE = 64 diff --git a/lib/cl/memory.jl b/lib/cl/memory.jl index d298a149..716b4801 100644 --- a/lib/cl/memory.jl +++ b/lib/cl/memory.jl @@ -1,6 +1,6 @@ # Raw memory management -export device_alloc, host_alloc, shared_alloc, free#, properties, lookup_alloc +export device_alloc, host_alloc, shared_alloc, free #, properties, lookup_alloc # # untyped buffers @@ -8,25 +8,25 @@ export device_alloc, host_alloc, shared_alloc, free#, properties, lookup_alloc abstract type AbstractBuffer end -Base.convert(T::Type{<:Union{Ptr,CLPtr}}, buf::AbstractBuffer) = +Base.convert(T::Type{<:Union{Ptr, CLPtr}}, buf::AbstractBuffer) = throw(ArgumentError("Illegal conversion of a $(typeof(buf)) to a $T")) # ccall integration # # taking the pointer of a buffer means returning the underlying pointer, # and not the pointer of the buffer object itself. -Base.unsafe_convert(P::Type{<:Union{Ptr,CLPtr}}, buf::AbstractBuffer) = convert(P, buf) +Base.unsafe_convert(P::Type{<:Union{Ptr, CLPtr}}, buf::AbstractBuffer) = convert(P, buf) function free(buf::AbstractBuffer; blocking = false) ctx = context(buf) freefun = if blocking ext_clMemBlockingFreeINTEL - else + else ext_clMemFreeINTEL end success = freefun(ctx, Ptr{Nothing}(UInt(buf.ptr))) - @assert success == CL_SUCCESS - return success + @assert success == CL_SUCCESS + return success end ## device buffer @@ -44,23 +44,25 @@ struct DeviceBuffer <: AbstractBuffer device::Device end -function device_alloc(ctx::Context, dev::Device, bytesize::Integer; - alignment::Integer=0, error_code::Ref{Int32}=Ref{Int32}(), properties::Tuple{Vararg{Symbol}}=()) - flags = 0 - if !isempty(properties) - for i in properties - if i == :wc - flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL - else - @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`, `:ipd`, and `:iph`" - end - end - end - - ptr = ext_clDeviceMemAllocINTEL(ctx, dev, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) - - @assert error_code[] == CL_SUCCESS - #= +function device_alloc( + ctx::Context, dev::Device, bytesize::Integer; + alignment::Integer = 0, error_code::Ref{Int32} = Ref{Int32}(), properties::Tuple{Vararg{Symbol}} = () + ) + flags = 0 + if !isempty(properties) + for i in properties + if i == :wc + flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL + else + @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`, `:ipd`, and `:iph`" + end + end + end + + ptr = ext_clDeviceMemAllocINTEL(ctx, dev, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) + + @assert error_code[] == CL_SUCCESS + #= @info ptr error_code[] result = Ref{UInt64}() @warn result @@ -70,7 +72,7 @@ function device_alloc(ctx::Context, dev::Device, bytesize::Integer; @error success result @assert success == CL_SUCCESS =# - return DeviceBuffer(reinterpret(CLPtr{Cvoid}, ptr), bytesize, ctx, dev) + return DeviceBuffer(reinterpret(CLPtr{Cvoid}, ptr), bytesize, ctx, dev) end Base.pointer(buf::DeviceBuffer) = buf.ptr @@ -99,23 +101,25 @@ struct HostBuffer <: AbstractBuffer context::Context end -function host_alloc(ctx::Context, bytesize::Integer; - alignment::Integer=0, error_code::Ref{Int32}=Ref{Int32}(), properties::Tuple{Vararg{Symbol}}=()) - flags = 0 - if !isempty(properties) - for i in properties - if i == :wc - flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL - else - @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`" - end - end - end - - ptr = ext_clHostMemAllocINTEL(ctx, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) - - @assert error_code[] == CL_SUCCESS - #= +function host_alloc( + ctx::Context, bytesize::Integer; + alignment::Integer = 0, error_code::Ref{Int32} = Ref{Int32}(), properties::Tuple{Vararg{Symbol}} = () + ) + flags = 0 + if !isempty(properties) + for i in properties + if i == :wc + flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL + else + @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`" + end + end + end + + ptr = ext_clHostMemAllocINTEL(ctx, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) + + @assert error_code[] == CL_SUCCESS + #= @info ptr error_code[] result = Ref{UInt64}() @warn result @@ -125,7 +129,7 @@ function host_alloc(ctx::Context, bytesize::Integer; @error success result @assert success == CL_SUCCESS =# - return HostBuffer(ptr, bytesize, ctx) + return HostBuffer(ptr, bytesize, ctx) end #= @@ -166,33 +170,35 @@ struct SharedBuffer <: AbstractBuffer ptr::CLPtr{Cvoid} bytesize::Int context::Context - device::Union{Nothing,Device} + device::Union{Nothing, Device} end -function shared_alloc(ctx::Context, dev::Device, bytesize::Integer; - alignment::Integer=0, error_code::Ref{Int32}=Ref{Int32}(), properties::Tuple{Vararg{Symbol}}=()) - flags = 0 - if !isempty(properties) - if (:ipd in properties) && (:iph in properties) - error("`properties` contains both `:ipd` and `:iph`, these flags are mutually exclusive.") - end - for i in properties - if i == :wc - flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL - elseif i == :ipd - flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL - elseif i == :iph - flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL - else - @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`, `:ipd`, and `:iph`" - end - end - end - - ptr = ext_clSharedMemAllocINTEL(ctx, dev, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) - - @assert error_code[] == CL_SUCCESS - #= +function shared_alloc( + ctx::Context, dev::Device, bytesize::Integer; + alignment::Integer = 0, error_code::Ref{Int32} = Ref{Int32}(), properties::Tuple{Vararg{Symbol}} = () + ) + flags = 0 + if !isempty(properties) + if (:ipd in properties) && (:iph in properties) + error("`properties` contains both `:ipd` and `:iph`, these flags are mutually exclusive.") + end + for i in properties + if i == :wc + flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL + elseif i == :ipd + flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL + elseif i == :iph + flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL + else + @warn "$i not recognized, ignoring flag. Valid optinos include `:wc`, `:ipd`, and `:iph`" + end + end + end + + ptr = ext_clSharedMemAllocINTEL(ctx, dev, cl_mem_properties_intel[CL_MEM_ALLOC_FLAGS_INTEL, flags, 0], bytesize, alignment, error_code) + + @assert error_code[] == CL_SUCCESS + #= @info ptr error_code[] result = Ref{UInt64}() @warn result @@ -202,7 +208,7 @@ function shared_alloc(ctx::Context, dev::Device, bytesize::Integer; @error success result @assert success == CL_SUCCESS =# - return SharedBuffer(reinterpret(CLPtr{Cvoid}, ptr), bytesize, ctx, dev) + return SharedBuffer(reinterpret(CLPtr{Cvoid}, ptr), bytesize, ctx, dev) end #= @@ -299,22 +305,26 @@ function lookup_alloc(ctx::Context, ptr::Union{Ptr,CLPtr}) end =# -function enqueue_usm_memcpy(dst::Union{CLPtr, Ptr}, src::Union{CLPtr, Ptr}, nbytes::Integer; queu::CmdQueue=queue(), blocking::Bool=false, - wait_for::Vector{Event}=Event[]) - n_evts = length(wait_for) +function enqueue_usm_memcpy( + dst::Union{CLPtr, Ptr}, src::Union{CLPtr, Ptr}, nbytes::Integer; queu::CmdQueue = queue(), blocking::Bool = false, + wait_for::Vector{Event} = Event[] + ) + n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] - GC.@preserve wait_for begin + return GC.@preserve wait_for begin ret_evt = Ref{cl_event}() ext_clEnqueueMemcpyINTEL(queu, blocking, dst, src, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] end end -function enqueue_usm_memfill(dst::Union{CLPtr, Ptr}, pattern::Union{Ptr{T},CLPtr{T}}, pattern_size::Integer, nbytes::Integer; queu::CmdQueue=queue(), - wait_for::Vector{Event}=Event[]) where T - n_evts = length(wait_for) +function enqueue_usm_memfill( + dst::Union{CLPtr, Ptr}, pattern::Union{Ptr{T}, CLPtr{T}}, pattern_size::Integer, nbytes::Integer; queu::CmdQueue = queue(), + wait_for::Vector{Event} = Event[] + ) where {T} + n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] - GC.@preserve wait_for begin + return GC.@preserve wait_for begin ret_evt = Ref{cl_event}() ext_clEnqueueMemFillINTEL(queu, dst, pattern, pattern_size, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] diff --git a/lib/cl/platform.jl b/lib/cl/platform.jl index 52065858..d96644f1 100644 --- a/lib/cl/platform.jl +++ b/lib/cl/platform.jl @@ -53,8 +53,8 @@ function Base.show(io::IO, p::Platform) strip_extra_whitespace = r"\s+" platform_name = replace(p.name, strip_extra_whitespace => " ") ptr_val = convert(UInt, pointer(p)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" - print(io, "OpenCL.Platform('$platform_name' @$ptr_address)") + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" + return print(io, "OpenCL.Platform('$platform_name' @$ptr_address)") end function platforms() @@ -100,10 +100,10 @@ end devices(p::Platform) = devices(p, CL_DEVICE_TYPE_ALL) function devices(p::Platform, dtype::Symbol) - devices(p, cl_device_type(dtype)) + return devices(p, cl_device_type(dtype)) end has_device_type(p::Platform, dtype) = length(devices(p, dtype)) > 0 -available_devices(p::Platform, dtype::Symbol) = filter(d -> d.available, devices(p, dtype)) +available_devices(p::Platform, dtype::Symbol) = filter(d -> d.available, devices(p, dtype)) available_devices(p::Platform) = available_devices(p, :all) diff --git a/lib/cl/pointer.jl b/lib/cl/pointer.jl index 048e7679..fdf0222d 100644 --- a/lib/cl/pointer.jl +++ b/lib/cl/pointer.jl @@ -23,7 +23,7 @@ else end # constructor -CLPtr{T}(x::Union{Int,UInt,CLPtr}) where {T} = Base.bitcast(CLPtr{T}, x) +CLPtr{T}(x::Union{Int, UInt, CLPtr}) where {T} = Base.bitcast(CLPtr{T}, x) const CL_NULL = CLPtr{Cvoid}(0) @@ -37,10 +37,10 @@ Base.eltype(::Type{<:CLPtr{T}}) where {T} = T # to and from integers ## pointer to integer -Base.convert(::Type{T}, x::CLPtr) where {T<:Integer} = T(UInt(x)) +Base.convert(::Type{T}, x::CLPtr) where {T <: Integer} = T(UInt(x)) ## integer to pointer -Base.convert(::Type{CLPtr{T}}, x::Union{Int,UInt}) where {T} = CLPtr{T}(x) -Int(x::CLPtr) = Base.bitcast(Int, x) +Base.convert(::Type{CLPtr{T}}, x::Union{Int, UInt}) where {T} = CLPtr{T}(x) +Int(x::CLPtr) = Base.bitcast(Int, x) UInt(x::CLPtr) = Base.bitcast(UInt, x) # between regular and OpenCL pointers @@ -54,10 +54,10 @@ Base.convert(::Type{CLPtr{T}}, p::CLPtr) where {T} = Base.bitcast(CLPtr{T}, p) Base.cconvert(::Type{<:CLPtr}, x) = x # fallback for unsafe_convert -Base.unsafe_convert(::Type{P}, x::CLPtr) where {P<:CLPtr} = convert(P, x) +Base.unsafe_convert(::Type{P}, x::CLPtr) where {P <: CLPtr} = convert(P, x) # from arrays -Base.unsafe_convert(::Type{CLPtr{S}}, a::AbstractArray{T}) where {S,T} = +Base.unsafe_convert(::Type{CLPtr{S}}, a::AbstractArray{T}) where {S, T} = convert(CLPtr{S}, Base.unsafe_convert(CLPtr{T}, a)) Base.unsafe_convert(::Type{CLPtr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))") @@ -68,15 +68,14 @@ Base.isequal(x::CLPtr, y::CLPtr) = (x === y) Base.isless(x::CLPtr{T}, y::CLPtr{T}) where {T} = x < y Base.:(==)(x::CLPtr, y::CLPtr) = UInt(x) == UInt(y) -Base.:(<)(x::CLPtr, y::CLPtr) = UInt(x) < UInt(y) -Base.:(-)(x::CLPtr, y::CLPtr) = UInt(x) - UInt(y) +Base.:(<)(x::CLPtr, y::CLPtr) = UInt(x) < UInt(y) +Base.:(-)(x::CLPtr, y::CLPtr) = UInt(x) - UInt(y) Base.:(+)(x::CLPtr, y::Integer) = oftype(x, Base.add_ptr(UInt(x), (y % UInt) % UInt)) Base.:(-)(x::CLPtr, y::Integer) = oftype(x, Base.sub_ptr(UInt(x), (y % UInt) % UInt)) Base.:(+)(x::Integer, y::CLPtr) = y + x - # # Host or device pointer # @@ -115,11 +114,15 @@ function Base.cconvert(::Type{PtrOrCLPtr{T}}, val) where {T} end function Base.unsafe_convert(::Type{PtrOrCLPtr{T}}, val) where {T} - ptr = if Core.Compiler.return_type(Base.unsafe_convert, - Tuple{Type{Ptr{T}}, typeof(val)}) !== Union{} + ptr = if Core.Compiler.return_type( + Base.unsafe_convert, + Tuple{Type{Ptr{T}}, typeof(val)} + ) !== Union{} Base.unsafe_convert(Ptr{T}, val) - elseif Core.Compiler.return_type(Base.unsafe_convert, - Tuple{Type{CLPtr{T}}, typeof(val)}) !== Union{} + elseif Core.Compiler.return_type( + Base.unsafe_convert, + Tuple{Type{CLPtr{T}}, typeof(val)} + ) !== Union{} Base.unsafe_convert(CLPtr{T}, val) else throw(ArgumentError("cannot convert to either a host or device pointer")) @@ -157,16 +160,16 @@ Base.convert(::Type{CLRef{T}}, x) where {T} = CLRef{T}(x) ## CLRef object backed by an array at index i -struct CLRefArray{T,A<:AbstractArray{T}} <: Ref{T} +struct CLRefArray{T, A <: AbstractArray{T}} <: Ref{T} x::A i::Int - CLRefArray{T,A}(x,i) where {T,A<:AbstractArray{T}} = new(x,i) + CLRefArray{T, A}(x, i) where {T, A <: AbstractArray{T}} = new(x, i) end -CLRefArray{T}(x::AbstractArray{T}, i::Int=1) where {T} = CLRefArray{T,typeof(x)}(x, i) -CLRefArray(x::AbstractArray{T}, i::Int=1) where {T} = CLRefArray{T}(x, i) +CLRefArray{T}(x::AbstractArray{T}, i::Int = 1) where {T} = CLRefArray{T, typeof(x)}(x, i) +CLRefArray(x::AbstractArray{T}, i::Int = 1) where {T} = CLRefArray{T}(x, i) Base.convert(::Type{CLRef{T}}, x::AbstractArray{T}) where {T} = CLRefArray(x, 1) -function Base.unsafe_convert(P::Type{CLPtr{T}}, b::CLRefArray{T}) where T +function Base.unsafe_convert(P::Type{CLPtr{T}}, b::CLRefArray{T}) where {T} return pointer(b.x, b.i) end function Base.unsafe_convert(P::Type{CLPtr{Any}}, b::CLRefArray{Any}) @@ -205,7 +208,5 @@ Base.unsafe_convert(::Type{RefOrCLRef{T}}, x::CLRefs{T}) where {T} = # support conversion from arrays Base.convert(::Type{RefOrCLRef{T}}, x::Array{T}) where {T} = convert(Ref{T}, x) Base.convert(::Type{RefOrCLRef{T}}, x::AbstractArray{T}) where {T} = convert(CLRef{T}, x) -Base.unsafe_convert(P::Type{RefOrCLRef{T}}, b::CLRefArray{T}) where T = +Base.unsafe_convert(P::Type{RefOrCLRef{T}}, b::CLRefArray{T}) where {T} = Base.bitcast(RefOrCLRef{T}, Base.unsafe_convert(CLRef{T}, b)) - - diff --git a/lib/cl/program.jl b/lib/cl/program.jl index 17c9f370..10fce893 100644 --- a/lib/cl/program.jl +++ b/lib/cl/program.jl @@ -5,7 +5,7 @@ using Printf mutable struct Program <: CLObject const id::cl_program - function Program(program_id::cl_program; retain::Bool=false) + function Program(program_id::cl_program; retain::Bool = false) p = new(program_id) retain && clRetainProgram(p) finalizer(clReleaseProgram, p) @@ -15,13 +15,13 @@ end Base.show(io::IO, p::Program) = begin ptr_val = convert(UInt, pointer(p)) - ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE>>2))" + ptr_address = "0x$(string(ptr_val, base = 16, pad = Sys.WORD_SIZE >> 2))" print(io, "OpenCL.Program(@$ptr_address)") end Base.unsafe_convert(::Type{cl_program}, p::Program) = p.id -function Program(; source=nothing, binaries=nothing, il=nothing) +function Program(; source = nothing, binaries = nothing, il = nothing) if count(!isnothing, (source, binaries, il)) != 1 throw(ArgumentError("Program must be source, binary, or intermediate language")) end @@ -45,7 +45,7 @@ function Program(; source=nothing, binaries=nothing, il=nothing) device_ids = Vector{cl_device_id}(undef, ndevices) bin_lengths = Vector{Csize_t}(undef, ndevices) binary_status = Vector{Cint}(undef, ndevices) - binary_ptrs= Vector{Ptr{UInt8}}(undef, ndevices) + binary_ptrs = Vector{Ptr{UInt8}}(undef, ndevices) try for (i, (dev, bin)) in enumerate(binaries) device_ids[i] = dev.id @@ -53,8 +53,10 @@ function Program(; source=nothing, binaries=nothing, il=nothing) binary_ptrs[i] = Base.unsafe_convert(Ptr{UInt8}, pointer(bin)) end err_code = Ref{Cint}() - program_id = clCreateProgramWithBinary(context(), ndevices, device_ids, bin_lengths, - binary_ptrs, binary_status, err_code) + program_id = clCreateProgramWithBinary( + context(), ndevices, device_ids, bin_lengths, + binary_ptrs, binary_status, err_code + ) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) end @@ -67,11 +69,11 @@ function Program(; source=nothing, binaries=nothing, il=nothing) throw(err) end end - Program(program_id) + return Program(program_id) end #TODO: build callback... -function build!(p::Program; options="") +function build!(p::Program; options = "") opts = String(options) ndevices = 0 device_ids = C_NULL @@ -87,7 +89,7 @@ function build!(p::Program; options="") if p.source !== nothing println(io) println(io, "Source code:") - for (i,line) in enumerate(split(p.source, "\n")) + for (i, line) in enumerate(split(p.source, "\n")) println(io, @sprintf("%s%-2d: %s", " ", i, line)) end end @@ -156,7 +158,7 @@ function Base.getproperty(p::Program, s::Symbol) elseif s == :context ctx = Ref{cl_context}() clGetProgramInfo(p, CL_PROGRAM_CONTEXT, sizeof(cl_context), ctx, C_NULL) - return Context(ctx[], retain=true) + return Context(ctx[], retain = true) elseif s == :build_status status_dict = Dict{Device, cl_build_status}() for device in p.devices diff --git a/lib/cl/state.jl b/lib/cl/state.jl index 78d04ad5..63571755 100644 --- a/lib/cl/state.jl +++ b/lib/cl/state.jl @@ -1,7 +1,7 @@ ## platform selection function platform() - get!(task_local_storage(), :CLPlatform) do + return get!(task_local_storage(), :CLPlatform) do ps = platforms() if isempty(ps) throw(ArgumentError("No OpenCL platforms found")) @@ -50,7 +50,7 @@ end ## device selection function device() - get!(task_local_storage(), :CLDevice) do + return get!(task_local_storage(), :CLDevice) do dev = default_device(platform()) isnothing(dev) && throw(ArgumentError("No OpenCL devices found")) dev @@ -69,7 +69,7 @@ end function device!(dtype::Symbol) dev = devices(platform(), dtype) isempty(dev) && throw(ArgumentError("No OpenCL devices found of type $dtype")) - device!(first(dev)) + return device!(first(dev)) end @@ -79,7 +79,7 @@ end const context_lock = ReentrantLock() const device_contexts = Dict{Device, Context}() function context() - get!(task_local_storage(), :CLContext) do + return get!(task_local_storage(), :CLContext) do @lock context_lock begin dev = device() get!(device_contexts, dev) do @@ -95,7 +95,7 @@ end function device!(f::Base.Callable, args...) old = device() device!(args...) - try + return try f() finally device!(old) @@ -106,15 +106,16 @@ end ## per-task queues # XXX: port CUDA.jl's per-array stream tracking, obviating the need for global sync -const queues = WeakKeyDict{cl.CmdQueue,Nothing}() +const queues = WeakKeyDict{cl.CmdQueue, Nothing}() function device_synchronize() for queue in keys(queues) cl.finish(queue) end + return end function queue() - get!(task_local_storage(), :CLQueue) do + return get!(task_local_storage(), :CLQueue) do q = CmdQueue() task_local_storage(:CLQueue, q) queues[q] = nothing @@ -139,7 +140,7 @@ end function queue!(f::Base.Callable, args...) old = queue() queue!(args...) - try + return try f() finally queue!(old) diff --git a/lib/intrinsics/src/SPIRVIntrinsics.jl b/lib/intrinsics/src/SPIRVIntrinsics.jl index 5de144eb..2f7a75f3 100644 --- a/lib/intrinsics/src/SPIRVIntrinsics.jl +++ b/lib/intrinsics/src/SPIRVIntrinsics.jl @@ -26,7 +26,7 @@ include("atomic.jl") macro import_all() code = quote end - for name in names(SPIRVIntrinsics; all=true) + for name in names(SPIRVIntrinsics; all = true) # bring all the names of this module in scope name in (:SPIRVIntrinsics, :eval, :include) && continue startswith(string(name), "#") && continue diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl index 144026fe..95fecc77 100644 --- a/lib/intrinsics/src/atomic.jl +++ b/lib/intrinsics/src/atomic.jl @@ -14,72 +14,93 @@ const atomic_memory_types = [AS.Local, AS.Global] # generically typed for gentype in atomic_integer_types, as in atomic_memory_types -@eval begin + @eval begin + + @device_function atomic_add!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_add", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_sub!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_sub", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_inc!(p::LLVMPtr{$gentype, $as}) = + @builtin_ccall("atomic_inc", $gentype, (LLVMPtr{$gentype, $as},), p) + + @device_function atomic_dec!(p::LLVMPtr{$gentype, $as}) = + @builtin_ccall("atomic_dec", $gentype, (LLVMPtr{$gentype, $as},), p) + + @device_function atomic_min!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_min", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_max!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_max", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_and!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_and", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_or!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_or", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_xor!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_xor", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_xchg!(p::LLVMPtr{$gentype, $as}, val::$gentype) = + @builtin_ccall( + "atomic_xchg", $gentype, + (LLVMPtr{$gentype, $as}, $gentype), p, val + ) + + @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype, $as}, cmp::$gentype, val::$gentype) = + @builtin_ccall( + "atomic_cmpxchg", $gentype, + (LLVMPtr{$gentype, $as}, $gentype, $gentype), p, cmp, val + ) -@device_function atomic_add!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_add", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_sub!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_sub", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_inc!(p::LLVMPtr{$gentype,$as}) = - @builtin_ccall("atomic_inc", $gentype, (LLVMPtr{$gentype,$as},), p) - -@device_function atomic_dec!(p::LLVMPtr{$gentype,$as}) = - @builtin_ccall("atomic_dec", $gentype, (LLVMPtr{$gentype,$as},), p) - -@device_function atomic_min!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_min", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_max!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_max", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_and!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_and", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_or!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_or", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_xor!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_xor", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_xchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) = - @builtin_ccall("atomic_cmpxchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val) - -end + end end # specifically typed for as in atomic_memory_types -@eval begin - -@device_function atomic_xchg!(p::LLVMPtr{Float32,$as}, val::Float32) = - @builtin_ccall("atomic_xchg", Float32, (LLVMPtr{Float32,$as}, Float32,), p, val) - -# XXX: why is only xchg supported on floats? isn't it safe for cmpxchg too, -# which should only perform bitwise comparisons? -@device_function atomic_cmpxchg!(p::LLVMPtr{Float32,$as}, cmp::Float32, val::Float32) = - reinterpret(Float32, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt32,$as}, p), - reinterpret(UInt32, cmp), - reinterpret(UInt32, val))) + @eval begin + + @device_function atomic_xchg!(p::LLVMPtr{Float32, $as}, val::Float32) = + @builtin_ccall("atomic_xchg", Float32, (LLVMPtr{Float32, $as}, Float32), p, val) + + # XXX: why is only xchg supported on floats? isn't it safe for cmpxchg too, + # which should only perform bitwise comparisons? + @device_function atomic_cmpxchg!(p::LLVMPtr{Float32, $as}, cmp::Float32, val::Float32) = + reinterpret( + Float32, atomic_cmpxchg!( + reinterpret(LLVMPtr{UInt32, $as}, p), + reinterpret(UInt32, cmp), + reinterpret(UInt32, val) + ) + ) + end end -end - # documentation @@ -161,7 +182,6 @@ returns `old`. atomic_xor! - # # High-level interface # @@ -229,9 +249,11 @@ macro atomic(ex) array = ref.args[1] indices = Expr(:tuple, ref.args[2:end]...) - esc(quote - $atomic_arrayset($array, $indices, $op, $val) - end) + return esc( + quote + $atomic_arrayset($array, $indices, $op, $val) + end + ) end # FIXME: make this respect the indexing style @@ -239,15 +261,19 @@ end atomic_arrayset(A, Base._to_linear_index(A, Is...), op, convert(T, val)) # native atomics -for (op,impl) in [(+) => atomic_add!, - (-) => atomic_sub!, - (&) => atomic_and!, - (|) => atomic_or!, - (⊻) => atomic_xor!, - Base.max => atomic_max!, - Base.min => atomic_min!] - @eval @inline atomic_arrayset(A::AbstractArray{T}, I::Integer, ::typeof($op), - val::T) where {T <: Union{Int32,UInt32}} = +for (op, impl) in [ + (+) => atomic_add!, + (-) => atomic_sub!, + (&) => atomic_and!, + (|) => atomic_or!, + (⊻) => atomic_xor!, + Base.max => atomic_max!, + Base.min => atomic_min!, + ] + @eval @inline atomic_arrayset( + A::AbstractArray{T}, I::Integer, ::typeof($op), + val::T + ) where {T <: Union{Int32, UInt32}} = $impl(pointer(A, I), val) end @@ -261,4 +287,5 @@ function atomic_arrayset(A::AbstractArray{T}, I::Integer, op::Function, val) whe old = atomic_cmpxchg!(ptr, cmp, new) (old == cmp) && return new end + return end diff --git a/lib/intrinsics/src/integer.jl b/lib/intrinsics/src/integer.jl index 7e36f02c..634ae0ba 100644 --- a/lib/intrinsics/src/integer.jl +++ b/lib/intrinsics/src/integer.jl @@ -7,39 +7,39 @@ const generic_integer_types = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, # generically typed for gentype in generic_integer_types -@eval begin + @eval begin -@device_override Base.abs(x::$gentype) = @builtin_ccall("abs", $gentype, ($gentype,), x) -@device_function abs_diff(x::$gentype, y::$gentype) = @builtin_ccall("abs_diff", $gentype, ($gentype, $gentype), x, y) + @device_override Base.abs(x::$gentype) = @builtin_ccall("abs", $gentype, ($gentype,), x) + @device_function abs_diff(x::$gentype, y::$gentype) = @builtin_ccall("abs_diff", $gentype, ($gentype, $gentype), x, y) -@device_function add_sat(x::$gentype, y::$gentype) = @builtin_ccall("add_sat", $gentype, ($gentype, $gentype), x, y) -@device_function hadd(x::$gentype, y::$gentype) = @builtin_ccall("hadd", $gentype, ($gentype, $gentype), x, y) -@device_function rhadd(x::$gentype, y::$gentype) = @builtin_ccall("rhadd", $gentype, ($gentype, $gentype), x, y) + @device_function add_sat(x::$gentype, y::$gentype) = @builtin_ccall("add_sat", $gentype, ($gentype, $gentype), x, y) + @device_function hadd(x::$gentype, y::$gentype) = @builtin_ccall("hadd", $gentype, ($gentype, $gentype), x, y) + @device_function rhadd(x::$gentype, y::$gentype) = @builtin_ccall("rhadd", $gentype, ($gentype, $gentype), x, y) -@device_override Base.clamp(x::$gentype, minval::$gentype, maxval::$gentype) = @builtin_ccall("clamp", $gentype, ($gentype, $gentype, $gentype), x, minval, maxval) + @device_override Base.clamp(x::$gentype, minval::$gentype, maxval::$gentype) = @builtin_ccall("clamp", $gentype, ($gentype, $gentype, $gentype), x, minval, maxval) -@device_function clz(x::$gentype) = @builtin_ccall("clz", $gentype, ($gentype,), x) -@device_function ctz(x::$gentype) = @builtin_ccall("ctz", $gentype, ($gentype,), x) + @device_function clz(x::$gentype) = @builtin_ccall("clz", $gentype, ($gentype,), x) + @device_function ctz(x::$gentype) = @builtin_ccall("ctz", $gentype, ($gentype,), x) -@device_function mad_hi(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad_hi", $gentype, ($gentype, $gentype, $gentype), a, b, c) -@device_function mad_sat(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad_sat", $gentype, ($gentype, $gentype, $gentype), a, b, c) + @device_function mad_hi(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad_hi", $gentype, ($gentype, $gentype, $gentype), a, b, c) + @device_function mad_sat(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad_sat", $gentype, ($gentype, $gentype, $gentype), a, b, c) -# XXX: these definitions introduce ambiguities -#@device_override Base.max(x::$gentype, y::$gentype) = @builtin_ccall("max", $gentype, ($gentype, $gentype), x, y) -#@device_override Base.min(x::$gentype, y::$gentype) = @builtin_ccall("min", $gentype, ($gentype, $gentype), x, y) + # XXX: these definitions introduce ambiguities + #@device_override Base.max(x::$gentype, y::$gentype) = @builtin_ccall("max", $gentype, ($gentype, $gentype), x, y) + #@device_override Base.min(x::$gentype, y::$gentype) = @builtin_ccall("min", $gentype, ($gentype, $gentype), x, y) -@device_function mul_hi(x::$gentype, y::$gentype) = @builtin_ccall("mul_hi", $gentype, ($gentype, $gentype), x, y) + @device_function mul_hi(x::$gentype, y::$gentype) = @builtin_ccall("mul_hi", $gentype, ($gentype, $gentype), x, y) -@device_function rotate(v::$gentype, i::$gentype) = @builtin_ccall("rotate", $gentype, ($gentype, $gentype), v, i) + @device_function rotate(v::$gentype, i::$gentype) = @builtin_ccall("rotate", $gentype, ($gentype, $gentype), v, i) -@device_function sub_sat(x::$gentype, y::$gentype) = @builtin_ccall("sub_sat", $gentype, ($gentype, $gentype), x, y) + @device_function sub_sat(x::$gentype, y::$gentype) = @builtin_ccall("sub_sat", $gentype, ($gentype, $gentype), x, y) -@device_function popcount(x::$gentype) = @builtin_ccall("popcount", $gentype, ($gentype,), x) + @device_function popcount(x::$gentype) = @builtin_ccall("popcount", $gentype, ($gentype,), x) -@device_function mad24(x::$gentype, y::$gentype, z::$gentype) = @builtin_ccall("mad24", $gentype, ($gentype, $gentype, $gentype), x, y, z) -@device_function mul24(x::$gentype, y::$gentype) = @builtin_ccall("mul24", $gentype, ($gentype, $gentype), x, y) + @device_function mad24(x::$gentype, y::$gentype, z::$gentype) = @builtin_ccall("mad24", $gentype, ($gentype, $gentype, $gentype), x, y, z) + @device_function mul24(x::$gentype, y::$gentype) = @builtin_ccall("mul24", $gentype, ($gentype, $gentype), x, y) -end + end end diff --git a/lib/intrinsics/src/math.jl b/lib/intrinsics/src/math.jl index 1e4c2a97..10e86357 100644 --- a/lib/intrinsics/src/math.jl +++ b/lib/intrinsics/src/math.jl @@ -1,7 +1,7 @@ # Math Functions # TODO: vector types -const generic_types = [Float32,Float64] +const generic_types = [Float32, Float64] const generic_types_float = [Float32] const generic_types_double = [Float64] @@ -9,110 +9,110 @@ const generic_types_double = [Float64] # generically typed for gentype in generic_types -@eval begin + @eval begin -@device_override Base.acos(x::$gentype) = @builtin_ccall("acos", $gentype, ($gentype,), x) -@device_override Base.acosh(x::$gentype) = @builtin_ccall("acosh", $gentype, ($gentype,), x) -@device_function acospi(x::$gentype) = @builtin_ccall("acospi", $gentype, ($gentype,), x) + @device_override Base.acos(x::$gentype) = @builtin_ccall("acos", $gentype, ($gentype,), x) + @device_override Base.acosh(x::$gentype) = @builtin_ccall("acosh", $gentype, ($gentype,), x) + @device_function acospi(x::$gentype) = @builtin_ccall("acospi", $gentype, ($gentype,), x) -@device_override Base.asin(x::$gentype) = @builtin_ccall("asin", $gentype, ($gentype,), x) -@device_override Base.asinh(x::$gentype) = @builtin_ccall("asinh", $gentype, ($gentype,), x) -@device_function asinpi(x::$gentype) = @builtin_ccall("asinpi", $gentype, ($gentype,), x) + @device_override Base.asin(x::$gentype) = @builtin_ccall("asin", $gentype, ($gentype,), x) + @device_override Base.asinh(x::$gentype) = @builtin_ccall("asinh", $gentype, ($gentype,), x) + @device_function asinpi(x::$gentype) = @builtin_ccall("asinpi", $gentype, ($gentype,), x) -@device_override Base.atan(y_over_x::$gentype) = @builtin_ccall("atan", $gentype, ($gentype,), y_over_x) -@device_override Base.atan(y::$gentype, x::$gentype) = @builtin_ccall("atan2", $gentype, ($gentype, $gentype), y, x) -@device_override Base.atanh(x::$gentype) = @builtin_ccall("atanh", $gentype, ($gentype,), x) -@device_function atanpi(x::$gentype) = @builtin_ccall("atanpi", $gentype, ($gentype,), x) -@device_function atanpi(y::$gentype, x::$gentype) = @builtin_ccall("atan2pi", $gentype, ($gentype, $gentype), y, x) + @device_override Base.atan(y_over_x::$gentype) = @builtin_ccall("atan", $gentype, ($gentype,), y_over_x) + @device_override Base.atan(y::$gentype, x::$gentype) = @builtin_ccall("atan2", $gentype, ($gentype, $gentype), y, x) + @device_override Base.atanh(x::$gentype) = @builtin_ccall("atanh", $gentype, ($gentype,), x) + @device_function atanpi(x::$gentype) = @builtin_ccall("atanpi", $gentype, ($gentype,), x) + @device_function atanpi(y::$gentype, x::$gentype) = @builtin_ccall("atan2pi", $gentype, ($gentype, $gentype), y, x) -@device_override Base.cbrt(x::$gentype) = @builtin_ccall("cbrt", $gentype, ($gentype,), x) + @device_override Base.cbrt(x::$gentype) = @builtin_ccall("cbrt", $gentype, ($gentype,), x) -@device_override Base.ceil(x::$gentype) = @builtin_ccall("ceil", $gentype, ($gentype,), x) + @device_override Base.ceil(x::$gentype) = @builtin_ccall("ceil", $gentype, ($gentype,), x) -@device_override Base.copysign(x::$gentype, y::$gentype) = @builtin_ccall("copysign", $gentype, ($gentype, $gentype), x, y) + @device_override Base.copysign(x::$gentype, y::$gentype) = @builtin_ccall("copysign", $gentype, ($gentype, $gentype), x, y) -@device_override Base.cos(x::$gentype) = @builtin_ccall("cos", $gentype, ($gentype,), x) -@device_override Base.cosh(x::$gentype) = @builtin_ccall("cosh", $gentype, ($gentype,), x) -@device_function cospi(x::$gentype) = @builtin_ccall("cospi", $gentype, ($gentype,), x) + @device_override Base.cos(x::$gentype) = @builtin_ccall("cos", $gentype, ($gentype,), x) + @device_override Base.cosh(x::$gentype) = @builtin_ccall("cosh", $gentype, ($gentype,), x) + @device_function cospi(x::$gentype) = @builtin_ccall("cospi", $gentype, ($gentype,), x) -@device_override SpecialFunctions.erfc(x::$gentype) = @builtin_ccall("erfc", $gentype, ($gentype,), x) -@device_override SpecialFunctions.erf(x::$gentype) = @builtin_ccall("erf", $gentype, ($gentype,), x) + @device_override SpecialFunctions.erfc(x::$gentype) = @builtin_ccall("erfc", $gentype, ($gentype,), x) + @device_override SpecialFunctions.erf(x::$gentype) = @builtin_ccall("erf", $gentype, ($gentype,), x) -@device_override Base.exp(x::$gentype) = @builtin_ccall("exp", $gentype, ($gentype,), x) -@device_override Base.exp2(x::$gentype) = @builtin_ccall("exp2", $gentype, ($gentype,), x) -@device_override Base.exp10(x::$gentype) = @builtin_ccall("exp10", $gentype, ($gentype,), x) -@device_override Base.expm1(x::$gentype) = @builtin_ccall("expm1", $gentype, ($gentype,), x) + @device_override Base.exp(x::$gentype) = @builtin_ccall("exp", $gentype, ($gentype,), x) + @device_override Base.exp2(x::$gentype) = @builtin_ccall("exp2", $gentype, ($gentype,), x) + @device_override Base.exp10(x::$gentype) = @builtin_ccall("exp10", $gentype, ($gentype,), x) + @device_override Base.expm1(x::$gentype) = @builtin_ccall("expm1", $gentype, ($gentype,), x) -@device_override Base.abs(x::$gentype) = @builtin_ccall("fabs", $gentype, ($gentype,), x) + @device_override Base.abs(x::$gentype) = @builtin_ccall("fabs", $gentype, ($gentype,), x) -@device_function dim(x::$gentype, y::$gentype) = @builtin_ccall("fdim", $gentype, ($gentype, $gentype), x, y) + @device_function dim(x::$gentype, y::$gentype) = @builtin_ccall("fdim", $gentype, ($gentype, $gentype), x, y) -@device_override Base.floor(x::$gentype) = @builtin_ccall("floor", $gentype, ($gentype,), x) + @device_override Base.floor(x::$gentype) = @builtin_ccall("floor", $gentype, ($gentype,), x) -@device_override Base.fma(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("fma", $gentype, ($gentype, $gentype, $gentype), a, b, c) + @device_override Base.fma(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("fma", $gentype, ($gentype, $gentype, $gentype), a, b, c) -@device_override Base.max(x::$gentype, y::$gentype) = @builtin_ccall("fmax", $gentype, ($gentype, $gentype), x, y) + @device_override Base.max(x::$gentype, y::$gentype) = @builtin_ccall("fmax", $gentype, ($gentype, $gentype), x, y) -@device_override Base.min(x::$gentype, y::$gentype) = @builtin_ccall("fmin", $gentype, ($gentype, $gentype), x, y) + @device_override Base.min(x::$gentype, y::$gentype) = @builtin_ccall("fmin", $gentype, ($gentype, $gentype), x, y) -# NOTE: Julia's mod behaves differently than fmod -#@device_override Base.mod(x::$gentype, y::$gentype) = @builtin_ccall("fmod", $gentype, ($gentype, $gentype), x, y) -# fract(x::$gentype, $gentype *iptr) = @builtin_ccall("fract", $gentype, ($gentype, $gentype *), x, iptr) + # NOTE: Julia's mod behaves differently than fmod + #@device_override Base.mod(x::$gentype, y::$gentype) = @builtin_ccall("fmod", $gentype, ($gentype, $gentype), x, y) + # fract(x::$gentype, $gentype *iptr) = @builtin_ccall("fract", $gentype, ($gentype, $gentype *), x, iptr) -@device_override Base.hypot(x::$gentype, y::$gentype) = @builtin_ccall("hypot", $gentype, ($gentype, $gentype), x, y) + @device_override Base.hypot(x::$gentype, y::$gentype) = @builtin_ccall("hypot", $gentype, ($gentype, $gentype), x, y) -@device_override SpecialFunctions.loggamma(x::$gentype) = @builtin_ccall("lgamma", $gentype, ($gentype,), x) + @device_override SpecialFunctions.loggamma(x::$gentype) = @builtin_ccall("lgamma", $gentype, ($gentype,), x) -@device_override Base.log(x::$gentype) = @builtin_ccall("log", $gentype, ($gentype,), x) -@device_override Base.log2(x::$gentype) = @builtin_ccall("log2", $gentype, ($gentype,), x) -@device_override Base.log10(x::$gentype) = @builtin_ccall("log10", $gentype, ($gentype,), x) -@device_override Base.log1p(x::$gentype) = @builtin_ccall("log1p", $gentype, ($gentype,), x) -@device_function logb(x::$gentype) = @builtin_ccall("logb", $gentype, ($gentype,), x) + @device_override Base.log(x::$gentype) = @builtin_ccall("log", $gentype, ($gentype,), x) + @device_override Base.log2(x::$gentype) = @builtin_ccall("log2", $gentype, ($gentype,), x) + @device_override Base.log10(x::$gentype) = @builtin_ccall("log10", $gentype, ($gentype,), x) + @device_override Base.log1p(x::$gentype) = @builtin_ccall("log1p", $gentype, ($gentype,), x) + @device_function logb(x::$gentype) = @builtin_ccall("logb", $gentype, ($gentype,), x) -@device_function mad(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad", $gentype, ($gentype, $gentype, $gentype), a, b, c) + @device_function mad(a::$gentype, b::$gentype, c::$gentype) = @builtin_ccall("mad", $gentype, ($gentype, $gentype, $gentype), a, b, c) -@device_function maxmag(x::$gentype, y::$gentype) = @builtin_ccall("maxmag", $gentype, ($gentype, $gentype), x, y) -@device_function minmag(x::$gentype, y::$gentype) = @builtin_ccall("minmag", $gentype, ($gentype, $gentype), x, y) + @device_function maxmag(x::$gentype, y::$gentype) = @builtin_ccall("maxmag", $gentype, ($gentype, $gentype), x, y) + @device_function minmag(x::$gentype, y::$gentype) = @builtin_ccall("minmag", $gentype, ($gentype, $gentype), x, y) -# modf(x::$gentype, $gentype *iptr) = @builtin_ccall("modf", $gentype, ($gentype, $gentype *), x, iptr) + # modf(x::$gentype, $gentype *iptr) = @builtin_ccall("modf", $gentype, ($gentype, $gentype *), x, iptr) -@device_function nextafter(x::$gentype, y::$gentype) = @builtin_ccall("nextafter", $gentype, ($gentype, $gentype), x, y) + @device_function nextafter(x::$gentype, y::$gentype) = @builtin_ccall("nextafter", $gentype, ($gentype, $gentype), x, y) -@device_override Base.:(^)(x::$gentype, y::$gentype) = @builtin_ccall("pow", $gentype, ($gentype, $gentype), x, y) -@device_function powr(x::$gentype, y::$gentype) = @builtin_ccall("powr", $gentype, ($gentype, $gentype), x, y) + @device_override Base.:(^)(x::$gentype, y::$gentype) = @builtin_ccall("pow", $gentype, ($gentype, $gentype), x, y) + @device_function powr(x::$gentype, y::$gentype) = @builtin_ccall("powr", $gentype, ($gentype, $gentype), x, y) -@device_override Base.rem(x::$gentype, y::$gentype) = @builtin_ccall("remainder", $gentype, ($gentype, $gentype), x, y) + @device_override Base.rem(x::$gentype, y::$gentype) = @builtin_ccall("remainder", $gentype, ($gentype, $gentype), x, y) -@device_function rint(x::$gentype) = @builtin_ccall("rint", $gentype, ($gentype,), x) + @device_function rint(x::$gentype) = @builtin_ccall("rint", $gentype, ($gentype,), x) -@device_override Base.round(x::$gentype) = @builtin_ccall("round", $gentype, ($gentype,), x) + @device_override Base.round(x::$gentype) = @builtin_ccall("round", $gentype, ($gentype,), x) -@device_function rsqrt(x::$gentype) = @builtin_ccall("rsqrt", $gentype, ($gentype,), x) + @device_function rsqrt(x::$gentype) = @builtin_ccall("rsqrt", $gentype, ($gentype,), x) -@device_override Base.sin(x::$gentype) = @builtin_ccall("sin", $gentype, ($gentype,), x) -@device_override function Base.sincos(x::$gentype) - cosval = Ref{$gentype}() - sinval = GC.@preserve cosval begin - ptr = Base.unsafe_convert(Ptr{$gentype}, cosval) - llvm_ptr = reinterpret(LLVMPtr{$gentype, AS.Private}, ptr) - @builtin_ccall("sincos", $gentype, ($gentype, LLVMPtr{$gentype, AS.Private}), x, llvm_ptr) - end - return sinval, cosval[] -end -@device_override Base.sinh(x::$gentype) = @builtin_ccall("sinh", $gentype, ($gentype,), x) -@device_function sinpi(x::$gentype) = @builtin_ccall("sinpi", $gentype, ($gentype,), x) + @device_override Base.sin(x::$gentype) = @builtin_ccall("sin", $gentype, ($gentype,), x) + @device_override function Base.sincos(x::$gentype) + cosval = Ref{$gentype}() + sinval = GC.@preserve cosval begin + ptr = Base.unsafe_convert(Ptr{$gentype}, cosval) + llvm_ptr = reinterpret(LLVMPtr{$gentype, AS.Private}, ptr) + @builtin_ccall("sincos", $gentype, ($gentype, LLVMPtr{$gentype, AS.Private}), x, llvm_ptr) + end + return sinval, cosval[] + end + @device_override Base.sinh(x::$gentype) = @builtin_ccall("sinh", $gentype, ($gentype,), x) + @device_function sinpi(x::$gentype) = @builtin_ccall("sinpi", $gentype, ($gentype,), x) -@device_override Base.sqrt(x::$gentype) = @builtin_ccall("sqrt", $gentype, ($gentype,), x) + @device_override Base.sqrt(x::$gentype) = @builtin_ccall("sqrt", $gentype, ($gentype,), x) -@device_override Base.tan(x::$gentype) = @builtin_ccall("tan", $gentype, ($gentype,), x) -@device_override Base.tanh(x::$gentype) = @builtin_ccall("tanh", $gentype, ($gentype,), x) -@device_function tanpi(x::$gentype) = @builtin_ccall("tanpi", $gentype, ($gentype,), x) + @device_override Base.tan(x::$gentype) = @builtin_ccall("tan", $gentype, ($gentype,), x) + @device_override Base.tanh(x::$gentype) = @builtin_ccall("tanh", $gentype, ($gentype,), x) + @device_function tanpi(x::$gentype) = @builtin_ccall("tanpi", $gentype, ($gentype,), x) -@device_override SpecialFunctions.gamma(x::$gentype) = @builtin_ccall("tgamma", $gentype, ($gentype,), x) + @device_override SpecialFunctions.gamma(x::$gentype) = @builtin_ccall("tgamma", $gentype, ($gentype,), x) -@device_override Base.trunc(x::$gentype) = @builtin_ccall("trunc", $gentype, ($gentype,), x) + @device_override Base.trunc(x::$gentype) = @builtin_ccall("trunc", $gentype, ($gentype,), x) -end + end end @@ -120,12 +120,12 @@ end for gentypef in generic_types_float -if gentypef !== Float32 -@eval begin -@device_override Base.max(x::$gentypef, y::Float32) = @builtin_ccall("fmax", $gentypef, ($gentypef, Float32), x, y) -@device_override Base.min(x::$gentypef, y::Float32) = @builtin_ccall("fmin", $gentypef, ($gentypef, Float32), x, y) -end -end + if gentypef !== Float32 + @eval begin + @device_override Base.max(x::$gentypef, y::Float32) = @builtin_ccall("fmax", $gentypef, ($gentypef, Float32), x, y) + @device_override Base.min(x::$gentypef, y::Float32) = @builtin_ccall("fmin", $gentypef, ($gentypef, Float32), x, y) + end + end end @@ -134,12 +134,12 @@ end for gentyped in generic_types_double -if gentyped !== Float64 -@eval begin -@device_override Base.min(x::$gentyped, y::Float64) = @builtin_ccall("fmin", $gentyped, ($gentyped, Float64), x, y) -@device_override Base.max(x::$gentyped, y::Float64) = @builtin_ccall("fmax", $gentyped, ($gentyped, Float64), x, y) -end -end + if gentyped !== Float64 + @eval begin + @device_override Base.min(x::$gentyped, y::Float64) = @builtin_ccall("fmin", $gentyped, ($gentyped, Float64), x, y) + @device_override Base.max(x::$gentyped, y::Float64) = @builtin_ccall("fmax", $gentyped, ($gentyped, Float64), x, y) + end + end end @@ -196,19 +196,19 @@ function _mulhi(a::Int64, b::Int64) mask = typemax(UInt32) a1, a2 = (a >> shift), a & mask b1, b2 = (b >> shift), b & mask - a1b1, a1b2, a2b1 = a1*b1, a1*b2, a2*b1 + a1b1, a1b2, a2b1 = a1 * b1, a1 * b2, a2 * b1 t1 = a1b2 + _mulhi(a2 % UInt32, b2 % UInt32) t2 = a2b1 + (t1 & mask) - a1b1 + (t1 >> shift) + (t2 >> shift) + return a1b1 + (t1 >> shift) + (t2 >> shift) end @static if isdefined(Base.MultiplicativeInverses, :_mul_high) - _mulhi(a::T, b::T) where {T<:Union{Signed, Unsigned}} = Base.MultiplicativeInverses._mul_high(a, b) + _mulhi(a::T, b::T) where {T <: Union{Signed, Unsigned}} = Base.MultiplicativeInverses._mul_high(a, b) @device_override Base.MultiplicativeInverses._mul_high(a::Int64, b::Int64) = _mulhi(a, b) else - _mulhi(a::T, b::T) where {T<:Union{Signed, Unsigned}} = ((widen(a)*b) >>> (sizeof(a)*8)) % T + _mulhi(a::T, b::T) where {T <: Union{Signed, Unsigned}} = ((widen(a) * b) >>> (sizeof(a) * 8)) % T @device_override function Base.div(a::Int64, b::Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}) x = _mulhi(a, b.multiplier) - x += (a*b.addmul) % Int64 - ifelse(abs(b.divisor) == 1, a*b.divisor, (signbit(x) + (x >> b.shift)) % Int64) + x += (a * b.addmul) % Int64 + ifelse(abs(b.divisor) == 1, a * b.divisor, (signbit(x) + (x >> b.shift)) % Int64) end end diff --git a/lib/intrinsics/src/memory.jl b/lib/intrinsics/src/memory.jl index d5ffe260..62d2d7c5 100644 --- a/lib/intrinsics/src/memory.jl +++ b/lib/intrinsics/src/memory.jl @@ -1,11 +1,11 @@ # local memory # get a pointer to local memory, with known (static) or zero length (dynamic) -@generated function emit_localmemory(::Type{T}, ::Val{len}=Val(0)) where {T,len} - Context() do ctx +@generated function emit_localmemory(::Type{T}, ::Val{len} = Val(0)) where {T, len} + return Context() do ctx # XXX: as long as LLVMPtr is emitted as i8*, it doesn't make sense to type the GV eltyp = convert(LLVMType, LLVM.Int8Type()) - T_ptr = convert(LLVMType, LLVMPtr{T,AS.Local}) + T_ptr = convert(LLVMType, LLVMPtr{T, AS.Local}) # create a function llvm_f, _ = create_function(T_ptr) @@ -33,6 +33,6 @@ ret!(builder, untyped_ptr) end - call_function(llvm_f, LLVMPtr{T,AS.Local}) + call_function(llvm_f, LLVMPtr{T, AS.Local}) end end diff --git a/lib/intrinsics/src/pointer.jl b/lib/intrinsics/src/pointer.jl index 228740e5..cb8a8122 100644 --- a/lib/intrinsics/src/pointer.jl +++ b/lib/intrinsics/src/pointer.jl @@ -4,13 +4,13 @@ export AS module AS -const Private = 0 -const Global = 1 -const Constant = 2 -const Local = 3 -const Generic = 4 -const Input = 5 -const Output = 6 -const Count = 7 + const Private = 0 + const Global = 1 + const Constant = 2 + const Local = 3 + const Generic = 4 + const Input = 5 + const Output = 6 + const Count = 7 end diff --git a/lib/intrinsics/src/printf.jl b/lib/intrinsics/src/printf.jl index b9e917d7..5f9d536b 100644 --- a/lib/intrinsics/src/printf.jl +++ b/lib/intrinsics/src/printf.jl @@ -25,10 +25,10 @@ macro printf(fmt::String, args...) end @generated function emit_printf(::Val{fmt}, argspec...) where {fmt} - arg_exprs = [:( argspec[$i] ) for i in 1:length(argspec)] + arg_exprs = [:(argspec[$i]) for i in 1:length(argspec)] arg_types = [argspec...] - Context() do ctx + return Context() do ctx T_void = LLVM.VoidType() T_int32 = LLVM.Int32Type() T_pint8 = LLVM.PointerType(LLVM.Int8Type(), AS.Constant) @@ -43,10 +43,10 @@ end entry = BasicBlock(llvm_f, "entry") position!(builder, entry) - str = globalstring_ptr!(builder, String(fmt); addrspace=AS.Constant) + str = globalstring_ptr!(builder, String(fmt); addrspace = AS.Constant) # invoke printf and return - printf_typ = LLVM.FunctionType(T_int32, [T_pint8]; vararg=true) + printf_typ = LLVM.FunctionType(T_int32, [T_pint8]; vararg = true) printf = LLVM.Function(mod, "printf", printf_typ) push!(function_attributes(printf), EnumAttribute("nobuiltin")) chars = call!(builder, printf_typ, printf, [str, parameters(llvm_f)...]) @@ -64,27 +64,27 @@ end # simple conversions, defining an expression and the resulting argument type. nothing fancy, # `@print` pretty directly maps to `@printf`; we should just support `write(::IO)`. const print_conversions = Dict( - Float32 => (x->:(Float64($x)), Float64), - Ptr{<:Any} => (x->:(convert(Ptr{Cvoid}, $x)), Ptr{Cvoid}), - Bool => (x->:(Int32($x)), Int32), + Float32 => (x -> :(Float64($x)), Float64), + Ptr{<:Any} => (x -> :(convert(Ptr{Cvoid}, $x)), Ptr{Cvoid}), + Bool => (x -> :(Int32($x)), Int32), ) # format specifiers const print_specifiers = Dict( # integers - Int16 => "%hd", - Int32 => "%d", - Int64 => Sys.iswindows() ? "%lld" : "%ld", - UInt16 => "%hu", - UInt32 => "%u", - UInt64 => Sys.iswindows() ? "%llu" : "%lu", + Int16 => "%hd", + Int32 => "%d", + Int64 => Sys.iswindows() ? "%lld" : "%ld", + UInt16 => "%hu", + UInt32 => "%u", + UInt64 => Sys.iswindows() ? "%llu" : "%lu", # floating-point - Float64 => "%f", + Float64 => "%f", # other - Cchar => "%c", - Ptr{Cvoid} => "%p", + Cchar => "%c", + Ptr{Cvoid} => "%p", ) @generated function _print(parts...) @@ -123,7 +123,7 @@ const print_specifiers = Dict( end end - quote + return quote Base.@_inline_meta @printf($fmt, $(args...)) end @@ -147,7 +147,7 @@ Limited string interpolation is also possible: ``` """ macro print(parts...) - args = Union{Val,Expr,Symbol}[] + args = Union{Val, Expr, Symbol}[] parts = [parts...] while true @@ -173,16 +173,18 @@ macro print(parts...) end end - quote + return quote _print($(map(esc, args)...)) end end @doc (@doc @print) -> macro println(parts...) - esc(quote - $SPIRVIntrinsics.@print($(parts...), "\n") - end) + return esc( + quote + $SPIRVIntrinsics.@print($(parts...), "\n") + end + ) end """ @@ -197,9 +199,17 @@ GPU analog of `Base.@show`. It comes with the same type restrictions as [`@print macro show(exs...) blk = Expr(:block) for ex in exs - push!(blk.args, :($SPIRVIntrinsics.@println($(sprint(Base.show_unquoted,ex)*" = "), - begin local value = $(esc(ex)) end))) + push!( + blk.args, :( + $SPIRVIntrinsics.@println( + $(sprint(Base.show_unquoted, ex) * " = "), + begin + local value = $(esc(ex)) + end + ) + ) + ) end isempty(exs) || push!(blk.args, :value) - blk + return blk end diff --git a/lib/intrinsics/src/synchronization.jl b/lib/intrinsics/src/synchronization.jl index 0d6c4138..55181cb6 100644 --- a/lib/intrinsics/src/synchronization.jl +++ b/lib/intrinsics/src/synchronization.jl @@ -7,7 +7,9 @@ const CLK_LOCAL_MEM_FENCE = cl_mem_fence_flags(1) const CLK_GLOBAL_MEM_FENCE = cl_mem_fence_flags(2) #barrier(flags=0) = @builtin_ccall("barrier", Cvoid, (UInt32,), flags) -@device_function barrier(flags=0) = Base.llvmcall((""" +@device_function barrier(flags = 0) = Base.llvmcall( + ( + """ declare void @_Z7barrierj(i32) #0 define void @entry(i32 %0) #1 { call void @_Z7barrierj(i32 %0) @@ -15,7 +17,9 @@ const CLK_GLOBAL_MEM_FENCE = cl_mem_fence_flags(2) } attributes #0 = { convergent } attributes #1 = { alwaysinline } - """, "entry"), - Cvoid, Tuple{Int32}, convert(Int32, flags)) + """, "entry", + ), + Cvoid, Tuple{Int32}, convert(Int32, flags) +) push!(opencl_builtins, "_Z7barrierj") # TODO: add support for attributes to @builting_ccall/LLVM.@typed_ccall diff --git a/lib/intrinsics/src/utils.jl b/lib/intrinsics/src/utils.jl index b6477ad2..499e8ada 100644 --- a/lib/intrinsics/src/utils.jl +++ b/lib/intrinsics/src/utils.jl @@ -10,7 +10,7 @@ macro builtin_ccall(name, ret, argtypes, args...) argtypes = argtypes.args function mangle(T::Type) - if T == Int32 + return if T == Int32 "i" elseif T == UInt32 "j" @@ -36,10 +36,10 @@ macro builtin_ccall(name, ret, argtypes, args...) # mangle address space ASstr = if as == AS.Global "CLglobal" - #elseif as == AS.Global_device - # "CLdevice" - #elseif as == AS.Global_host - # "CLhost" + #elseif as == AS.Global_device + # "CLdevice" + #elseif as == AS.Global_host + # "CLhost" elseif as == AS.Local "CLlocal" elseif as == AS.Constant @@ -72,9 +72,11 @@ macro builtin_ccall(name, ret, argtypes, args...) end push!(opencl_builtins, mangled) - esc(quote - @typed_ccall($mangled, llvmcall, $ret, ($(argtypes...),), $(args...)) - end) + return esc( + quote + @typed_ccall($mangled, llvmcall, $ret, ($(argtypes...),), $(args...)) + end + ) end @@ -84,9 +86,11 @@ end Base.Experimental.@MethodTable(method_table) macro device_override(ex) - esc(quote - Base.Experimental.@overlay($method_table, $ex) - end) + return esc( + quote + Base.Experimental.@overlay($method_table, $ex) + end + ) end macro device_function(ex) @@ -98,8 +102,10 @@ macro device_function(ex) error("This function is not intended for use on the CPU") end - esc(quote - $(ExprTools.combinedef(def)) - @device_override $ex - end) + return esc( + quote + $(ExprTools.combinedef(def)) + @device_override $ex + end + ) end diff --git a/lib/intrinsics/src/work_item.jl b/lib/intrinsics/src/work_item.jl index ee3cf73e..4db9818a 100644 --- a/lib/intrinsics/src/work_item.jl +++ b/lib/intrinsics/src/work_item.jl @@ -1,28 +1,28 @@ # Work-Item Functions export get_work_dim, - get_global_size, get_global_id, - get_local_size, get_enqueued_local_size, get_local_id, - get_num_groups, get_group_id, - get_global_offset, - get_global_linear_id, get_local_linear_id + get_global_size, get_global_id, + get_local_size, get_enqueued_local_size, get_local_id, + get_num_groups, get_group_id, + get_global_offset, + get_global_linear_id, get_local_linear_id # NOTE: these functions now unsafely truncate to Int to avoid top bit checks. # we should probably use range metadata instead. @device_function get_work_dim() = @builtin_ccall("get_work_dim", UInt32, ()) % Int -@device_function get_global_size(dimindx::Integer=1) = @builtin_ccall("get_global_size", UInt, (UInt32,), dimindx-1) % Int -@device_function get_global_id(dimindx::Integer=1) = @builtin_ccall("get_global_id", UInt, (UInt32,), dimindx-1) % Int + 1 +@device_function get_global_size(dimindx::Integer = 1) = @builtin_ccall("get_global_size", UInt, (UInt32,), dimindx - 1) % Int +@device_function get_global_id(dimindx::Integer = 1) = @builtin_ccall("get_global_id", UInt, (UInt32,), dimindx - 1) % Int + 1 -@device_function get_local_size(dimindx::Integer=1) = @builtin_ccall("get_local_size", UInt, (UInt32,), dimindx-1) % Int -@device_function get_enqueued_local_size(dimindx::Integer=1) = @builtin_ccall("get_enqueued_local_size", UInt, (UInt32,), dimindx-1) % Int -@device_function get_local_id(dimindx::Integer=1) = @builtin_ccall("get_local_id", UInt, (UInt32,), dimindx-1) % Int + 1 +@device_function get_local_size(dimindx::Integer = 1) = @builtin_ccall("get_local_size", UInt, (UInt32,), dimindx - 1) % Int +@device_function get_enqueued_local_size(dimindx::Integer = 1) = @builtin_ccall("get_enqueued_local_size", UInt, (UInt32,), dimindx - 1) % Int +@device_function get_local_id(dimindx::Integer = 1) = @builtin_ccall("get_local_id", UInt, (UInt32,), dimindx - 1) % Int + 1 -@device_function get_num_groups(dimindx::Integer=1) = @builtin_ccall("get_num_groups", UInt, (UInt32,), dimindx-1) % Int -@device_function get_group_id(dimindx::Integer=1) = @builtin_ccall("get_group_id", UInt, (UInt32,), dimindx-1) % Int + 1 +@device_function get_num_groups(dimindx::Integer = 1) = @builtin_ccall("get_num_groups", UInt, (UInt32,), dimindx - 1) % Int +@device_function get_group_id(dimindx::Integer = 1) = @builtin_ccall("get_group_id", UInt, (UInt32,), dimindx - 1) % Int + 1 -@device_function get_global_offset(dimindx::Integer=1) = @builtin_ccall("get_global_offset", UInt, (UInt32,), dimindx-1) % Int + 1 +@device_function get_global_offset(dimindx::Integer = 1) = @builtin_ccall("get_global_offset", UInt, (UInt32,), dimindx - 1) % Int + 1 @device_function get_global_linear_id() = @builtin_ccall("get_global_linear_id", UInt, ()) % Int + 1 @device_function get_local_linear_id() = @builtin_ccall("get_local_linear_id", UInt, ()) % Int + 1 diff --git a/res/opencl_prologue.jl b/res/opencl_prologue.jl index 33f8536e..4d9bbf26 100644 --- a/res/opencl_prologue.jl +++ b/res/opencl_prologue.jl @@ -4,9 +4,11 @@ end function check(f) - res = retry_reclaim(err -> err == CL_OUT_OF_RESOURCES || - err == CL_MEM_OBJECT_ALLOCATION_FAILURE || - err == CL_OUT_OF_HOST_MEMORY) do + res = retry_reclaim( + err -> err == CL_OUT_OF_RESOURCES || + err == CL_MEM_OBJECT_ALLOCATION_FAILURE || + err == CL_OUT_OF_HOST_MEMORY + ) do f() end diff --git a/res/wrap.jl b/res/wrap.jl index dbc286f0..4c9401f3 100644 --- a/res/wrap.jl +++ b/res/wrap.jl @@ -10,7 +10,7 @@ using Clang.Generators using JuliaFormatter -function wrap(name, headers...; defines=[], include_dirs=[], dependents=true) +function wrap(name, headers...; defines = [], include_dirs = [], dependents = true) @info "Wrapping $name" args = get_default_args() @@ -37,7 +37,7 @@ function wrap(name, headers...; defines=[], include_dirs=[], dependents=true) # (i.e., not from included ones) if !dependents function rewrite!(dag::ExprDAG) - replace!(get_nodes(dag)) do node + return replace!(get_nodes(dag)) do node path = normpath(Clang.get_filename(node.cursor)) if !in(path, headers) return ExprNode(node.id, Generators.Skip(), node.cursor, Expr[], node.adj) @@ -88,6 +88,7 @@ function rewriter!(ctx, options) end end end + return end @@ -103,8 +104,10 @@ function main() paths = map(headers) do header joinpath(include_dir, "CL", header) end - wrap("opencl", paths...; include_dirs=[include_dir], - defines=["CL_TARGET_OPENCL_VERSION" => "300"],) + return wrap( + "opencl", paths...; include_dirs = [include_dir], + defines = ["CL_TARGET_OPENCL_VERSION" => "300"], + ) end isinteractive() || main() diff --git a/src/OpenCLKernels.jl b/src/OpenCLKernels.jl index e0abd9f6..1a8e1ee7 100644 --- a/src/OpenCLKernels.jl +++ b/src/OpenCLKernels.jl @@ -17,9 +17,9 @@ export OpenCLBackend struct OpenCLBackend <: KA.GPU end -KA.allocate(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = CLArray{T}(undef, dims) -KA.zeros(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.zeros(T, dims) -KA.ones(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.ones(T, dims) +KA.allocate(::OpenCLBackend, ::Type{T}, dims::Tuple) where {T} = CLArray{T}(undef, dims) +KA.zeros(::OpenCLBackend, ::Type{T}, dims::Tuple) where {T} = OpenCL.zeros(T, dims) +KA.ones(::OpenCLBackend, ::Type{T}, dims::Tuple) where {T} = OpenCL.ones(T, dims) KA.get_backend(::CLArray) = OpenCLBackend() # TODO should be non-blocking @@ -34,7 +34,7 @@ Adapt.adapt_storage(::KA.CPU, a::CLArray) = convert(Array, a) ## Memory Operations function KA.copyto!(::OpenCLBackend, A, B) - copyto!(A, B) + return copyto!(A, B) # TODO: Address device to host copies in jl being synchronizing end @@ -42,11 +42,13 @@ end ## Kernel Launch function KA.mkcontext(kernel::KA.Kernel{OpenCLBackend}, _ndrange, iterspace) - KA.CompilerMetadata{KA.ndrange(kernel), KA.DynamicCheck}(_ndrange, iterspace) + return KA.CompilerMetadata{KA.ndrange(kernel), KA.DynamicCheck}(_ndrange, iterspace) end -function KA.mkcontext(kernel::KA.Kernel{OpenCLBackend}, I, _ndrange, iterspace, - ::Dynamic) where Dynamic - KA.CompilerMetadata{KA.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) +function KA.mkcontext( + kernel::KA.Kernel{OpenCLBackend}, I, _ndrange, iterspace, + ::Dynamic + ) where {Dynamic} + return KA.CompilerMetadata{KA.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) end function KA.launch_config(kernel::KA.Kernel{OpenCLBackend}, ndrange, workgroupsize) @@ -54,7 +56,7 @@ function KA.launch_config(kernel::KA.Kernel{OpenCLBackend}, ndrange, workgroupsi ndrange = (ndrange,) end if workgroupsize isa Integer - workgroupsize = (workgroupsize, ) + workgroupsize = (workgroupsize,) end # partition checked that the ndrange's agreed @@ -63,7 +65,7 @@ function KA.launch_config(kernel::KA.Kernel{OpenCLBackend}, ndrange, workgroupsi end iterspace, dynamic = if KA.workgroupsize(kernel) <: KA.DynamicSize && - workgroupsize === nothing + workgroupsize === nothing # use ndrange as preliminary workgroupsize for autotuning KA.partition(kernel, ndrange, ndrange) else @@ -82,13 +84,13 @@ function threads_to_workgroupsize(threads, ndrange) end end -function (obj::KA.Kernel{OpenCLBackend})(args...; ndrange=nothing, workgroupsize=nothing) +function (obj::KA.Kernel{OpenCLBackend})(args...; ndrange = nothing, workgroupsize = nothing) ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) # this might not be the final context, since we may tune the workgroupsize ctx = KA.mkcontext(obj, ndrange, iterspace) - kernel = @opencl launch=false obj.f(ctx, args...) + kernel = @opencl launch = false obj.f(ctx, args...) # figure out the optimal workgroupsize automatically if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing diff --git a/src/array.jl b/src/array.jl index 31bf19a4..03489917 100644 --- a/src/array.jl +++ b/src/array.jl @@ -14,7 +14,7 @@ end function contains_eltype(T, X) if T === X - return true + return true elseif T isa Union for U in Base.uniontypes(T) contains_eltype(U, X) && return true @@ -28,44 +28,46 @@ function contains_eltype(T, X) end function check_eltype(T) - Base.allocatedinline(T) || error("CLArray only supports element types that are stored inline") - Base.isbitsunion(T) && error("CLArray does not yet support isbits-union arrays") - !("cl_khr_fp16" in cl.device().extensions) && contains_eltype(T, Float16) && error("Float16 is not supported on this device") - !("cl_khr_fp64" in cl.device().extensions) && contains_eltype(T, Float64) && error("Float16 is not supported on this device") + Base.allocatedinline(T) || error("CLArray only supports element types that are stored inline") + Base.isbitsunion(T) && error("CLArray does not yet support isbits-union arrays") + !("cl_khr_fp16" in cl.device().extensions) && contains_eltype(T, Float16) && error("Float16 is not supported on this device") + return !("cl_khr_fp64" in cl.device().extensions) && contains_eltype(T, Float64) && error("Float16 is not supported on this device") end -mutable struct CLArray{T,N,M} <: AbstractGPUArray{T,N} - data::DataRef{Managed{M}} +mutable struct CLArray{T, N, M} <: AbstractGPUArray{T, N} + data::DataRef{Managed{M}} - maxsize::Int # maximum data size; excluding any selector bytes - offset::Int # offset of the data in memory, in number of elements + maxsize::Int # maximum data size; excluding any selector bytes + offset::Int # offset of the data in memory, in number of elements - dims::Dims{N} + dims::Dims{N} - function CLArray{T,N,M}(::UndefInitializer, dims::Dims{N}) where {T,N,M} - check_eltype(T) - maxsize = prod(dims) * sizeof(T) - bufsize = if Base.isbitsunion(T) - # type tag array past the data - maxsize + prod(dims) - else - maxsize - end + function CLArray{T, N, M}(::UndefInitializer, dims::Dims{N}) where {T, N, M} + check_eltype(T) + maxsize = prod(dims) * sizeof(T) + bufsize = if Base.isbitsunion(T) + # type tag array past the data + maxsize + prod(dims) + else + maxsize + end - GPUArrays.cached_alloc((CLArray, cl.device(), T, bufsize, M)) do - data = DataRef(managed -> release(managed.mem), Managed(allocate(M, cl.context(), cl.device(), bufsize, Base.datatype_alignment(T)))) - obj = new{T,N,M}(data, maxsize, 0, dims) - finalizer(unsafe_free!, obj) - return obj - end::CLArray{T, N, M} - end + return GPUArrays.cached_alloc((CLArray, cl.device(), T, bufsize, M)) do + data = DataRef(managed -> release(managed.mem), Managed(allocate(M, cl.context(), cl.device(), bufsize, Base.datatype_alignment(T)))) + obj = new{T, N, M}(data, maxsize, 0, dims) + finalizer(unsafe_free!, obj) + return obj + end::CLArray{T, N, M} + end - function CLArray{T,N}(data::DataRef{Managed{M}}, dims::Dims{N}; - maxsize::Int=prod(dims) * sizeof(T), offset::Int=0) where {T,N,M} - check_eltype(T) - obj = new{T,N,M}(data, maxsize, offset, dims) - finalizer(unsafe_free!, obj) - end + function CLArray{T, N}( + data::DataRef{Managed{M}}, dims::Dims{N}; + maxsize::Int = prod(dims) * sizeof(T), offset::Int = 0 + ) where {T, N, M} + check_eltype(T) + obj = new{T, N, M}(data, maxsize, offset, dims) + return finalizer(unsafe_free!, obj) + end end GPUArrays.storage(a::CLArray) = a.data @@ -78,76 +80,78 @@ Base.dataids(A::CLArray) = (UInt(pointer(A)),) Base.unaliascopy(A::CLArray) = copy(A) function Base.mightalias(A::CLArray, B::CLArray) - rA = pointer(A):pointer(A)+sizeof(A) - rB = pointer(B):pointer(B)+sizeof(B) - return first(rA) <= first(rB) < last(rA) || first(rB) <= first(rA) < last(rB) + rA = pointer(A):(pointer(A) + sizeof(A)) + rB = pointer(B):(pointer(B) + sizeof(B)) + return first(rA) <= first(rB) < last(rA) || first(rB) <= first(rA) < last(rB) end ## convenience constructors -const CLVector{T} = CLArray{T,1} -const CLMatrix{T} = CLArray{T,2} -const CLVecOrMat{T} = Union{CLVector{T},CLMatrix{T}} +const CLVector{T} = CLArray{T, 1} +const CLMatrix{T} = CLArray{T, 2} +const CLVecOrMat{T} = Union{CLVector{T}, CLMatrix{T}} # default to non-unified memory -CLArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N} = - CLArray{T,N,cl.DeviceBuffer}(undef, dims) +CLArray{T, N}(::UndefInitializer, dims::Dims{N}) where {T, N} = + CLArray{T, N, cl.DeviceBuffer}(undef, dims) # buffer, type and dimensionality specified -CLArray{T,N,M}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N,M} = - CLArray{T,N,M}(undef, convert(Tuple{Vararg{Int}}, dims)) -CLArray{T,N,M}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N,M} = - CLArray{T,N,M}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T, N, M}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N, M} = + CLArray{T, N, M}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T, N, M}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N, M} = + CLArray{T, N, M}(undef, convert(Tuple{Vararg{Int}}, dims)) # type and dimensionality specified -CLArray{T,N}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = - CLArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) -CLArray{T,N}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = - CLArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T, N}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N} = + CLArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T, N}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N} = + CLArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) # type but not dimensionality specified -CLArray{T}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = - CLArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) -CLArray{T}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = - CLArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N} = + CLArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) +CLArray{T}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N} = + CLArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) # empty vector constructor -CLArray{T,1,M}() where {T,M} = CLArray{T,1,M}(undef, 0) -CLArray{T,1}() where {T} = CLArray{T,1}(undef, 0) +CLArray{T, 1, M}() where {T, M} = CLArray{T, 1, M}(undef, 0) +CLArray{T, 1}() where {T} = CLArray{T, 1}(undef, 0) # do-block constructors -for (ctor, tvars) in (:CLArray => (), - :(CLArray{T}) => (:T,), - :(CLArray{T,N}) => (:T, :N), - :(CLArray{T,N,M}) => (:T, :N, :M)) - @eval begin - function $ctor(f::Function, args...) where {$(tvars...)} - xs = $ctor(args...) - try - f(xs) - finally - unsafe_free!(xs) - end +for (ctor, tvars) in ( + :CLArray => (), + :(CLArray{T}) => (:T,), + :(CLArray{T, N}) => (:T, :N), + :(CLArray{T, N, M}) => (:T, :N, :M), + ) + @eval begin + function $ctor(f::Function, args...) where {$(tvars...)} + xs = $ctor(args...) + return try + f(xs) + finally + unsafe_free!(xs) + end + end end - end end -Base.similar(a::CLArray{T,N,M}) where {T,N,M} = - CLArray{T,N,M}(undef, size(a)) -Base.similar(a::CLArray{T,<:Any,M}, dims::Base.Dims{N}) where {T,N,M} = - CLArray{T,N,M}(undef, dims) -Base.similar(a::CLArray{<:Any,<:Any,M}, ::Type{T}, dims::Base.Dims{N}) where {T,N,M} = - CLArray{T,N,M}(undef, dims) +Base.similar(a::CLArray{T, N, M}) where {T, N, M} = + CLArray{T, N, M}(undef, size(a)) +Base.similar(a::CLArray{T, <:Any, M}, dims::Base.Dims{N}) where {T, N, M} = + CLArray{T, N, M}(undef, dims) +Base.similar(a::CLArray{<:Any, <:Any, M}, ::Type{T}, dims::Base.Dims{N}) where {T, N, M} = + CLArray{T, N, M}(undef, dims) -function Base.copy(a::CLArray{T,N}) where {T,N} - b = similar(a) - @inbounds copyto!(b, a) +function Base.copy(a::CLArray{T, N}) where {T, N} + b = similar(a) + return @inbounds copyto!(b, a) end function Base.deepcopy_internal(x::CLArray, dict::IdDict) - haskey(dict, x) && return dict[x]::typeof(x) - return dict[x] = copy(x) + haskey(dict, x) && return dict[x]::typeof(x) + return dict[x] = copy(x) end @@ -314,7 +318,7 @@ context(A::CLArray) = cl.context(A.data[].mem) device(A::CLArray) = cl.device(A.data[].mem) buftype(x::CLArray) = buftype(typeof(x)) -buftype(::Type{<:CLArray{<:Any,<:Any,M}}) where {M} = @isdefined(M) ? M : Any +buftype(::Type{<:CLArray{<:Any, <:Any, M}}) where {M} = @isdefined(M) ? M : Any is_device(a::CLArray) = buftype(a) == cl.DeviceBuffer is_shared(a::CLArray) = buftype(a) == cl.SharedBuffer @@ -324,8 +328,8 @@ is_host(a::CLArray) = buftype(a) == cl.HostBuffer ## derived types export DenseCLArray, DenseCLVector, DenseCLMatrix, DenseCLVecOrMat, - StridedCLArray, StridedCLVector, StridedCLMatrix, StridedCLVecOrMat, - WrappedCLArray, WrappedCLVector, WrappedCLMatrix, WrappedCLVecOrMat + StridedCLArray, StridedCLVector, StridedCLMatrix, StridedCLVecOrMat, + WrappedCLArray, WrappedCLVector, WrappedCLMatrix, WrappedCLVecOrMat # dense arrays: stored contiguously in memory # @@ -333,62 +337,70 @@ export DenseCLArray, DenseCLVector, DenseCLMatrix, DenseCLVecOrMat, # this simplifies common use cases, and greatly improves load time. # cl.jl 2.0 experimented with using ReshapedArray/ReinterpretArray/SubArray, # but that proved much too costly. TODO: revisit when we have better Base support. -const DenseCLArray{T,N} = CLArray{T,N} -const DenseCLVector{T} = DenseCLArray{T,1} -const DenseCLMatrix{T} = DenseCLArray{T,2} +const DenseCLArray{T, N} = CLArray{T, N} +const DenseCLVector{T} = DenseCLArray{T, 1} +const DenseCLMatrix{T} = DenseCLArray{T, 2} const DenseCLVecOrMat{T} = Union{DenseCLVector{T}, DenseCLMatrix{T}} # XXX: these dummy aliases (DenseCLArray=CLArray) break alias printing, as # `Base.print_without_params` only handles the case of a single alias. # strided arrays -const StridedSubCLArray{T,N,I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange, - Base.AbstractCartesianIndex}}}} = - SubArray{T,N,<:CLArray,I} -const StridedCLArray{T,N} = Union{CLArray{T,N}, StridedSubCLArray{T,N}} -const StridedCLVector{T} = StridedCLArray{T,1} -const StridedCLMatrix{T} = StridedCLArray{T,2} +const StridedSubCLArray{ + T, N, I <: Tuple{ + Vararg{ + Union{ + Base.RangeIndex, Base.ReshapedUnitRange, + Base.AbstractCartesianIndex, + }, + }, + }, +} = + SubArray{T, N, <:CLArray, I} +const StridedCLArray{T, N} = Union{CLArray{T, N}, StridedSubCLArray{T, N}} +const StridedCLVector{T} = StridedCLArray{T, 1} +const StridedCLMatrix{T} = StridedCLArray{T, 2} const StridedCLVecOrMat{T} = Union{StridedCLVector{T}, StridedCLMatrix{T}} -@inline function Base.pointer(x::StridedCLArray{T}, i::Integer=1; type=cl.DeviceBuffer) where T +@inline function Base.pointer(x::StridedCLArray{T}, i::Integer = 1; type = cl.DeviceBuffer) where {T} PT = if type == cl.DeviceBuffer - CLPtr{T} + CLPtr{T} elseif type == cl.HostBuffer - Ptr{T} + Ptr{T} else - error("unknown memory type") + error("unknown memory type") end - Base.unsafe_convert(PT, x) + Base._memory_offset(x, i) + return Base.unsafe_convert(PT, x) + Base._memory_offset(x, i) end # anything that's (secretly) backed by a CLArray -const WrappedCLArray{T,N} = Union{CLArray{T,N}, WrappedArray{T,N,CLArray,CLArray{T,N}}} -const WrappedCLVector{T} = WrappedCLArray{T,1} -const WrappedCLMatrix{T} = WrappedCLArray{T,2} +const WrappedCLArray{T, N} = Union{CLArray{T, N}, WrappedArray{T, N, CLArray, CLArray{T, N}}} +const WrappedCLVector{T} = WrappedCLArray{T, 1} +const WrappedCLMatrix{T} = WrappedCLArray{T, 2} const WrappedCLVecOrMat{T} = Union{WrappedCLVector{T}, WrappedCLMatrix{T}} ## interop with other arrays -@inline function CLArray{T,N,B}(xs::AbstractArray{<:Any,N}) where {T,N,B} - A = CLArray{T,N,B}(undef, size(xs)) - copyto!(A, convert(Array{T}, xs)) - return A +@inline function CLArray{T, N, B}(xs::AbstractArray{<:Any, N}) where {T, N, B} + A = CLArray{T, N, B}(undef, size(xs)) + copyto!(A, convert(Array{T}, xs)) + return A end -@inline CLArray{T,N}(xs::AbstractArray{<:Any,N}) where {T,N} = - CLArray{T,N,cl.DeviceBuffer}(xs) +@inline CLArray{T, N}(xs::AbstractArray{<:Any, N}) where {T, N} = + CLArray{T, N, cl.DeviceBuffer}(xs) -@inline CLArray{T,N}(xs::CLArray{<:Any,N,B}) where {T,N,B} = - CLArray{T,N,B}(xs) +@inline CLArray{T, N}(xs::CLArray{<:Any, N, B}) where {T, N, B} = + CLArray{T, N, B}(xs) # underspecified constructors -CLArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = CLArray{T,N}(xs) -(::Type{CLArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = CLArray{S,N}(x) -CLArray(A::AbstractArray{T,N}) where {T,N} = CLArray{T,N}(A) +CLArray{T}(xs::AbstractArray{S, N}) where {T, N, S} = CLArray{T, N}(xs) +(::Type{CLArray{T, N} where {T}})(x::AbstractArray{S, N}) where {S, N} = CLArray{S, N}(x) +CLArray(A::AbstractArray{T, N}) where {T, N} = CLArray{T, N}(A) # idempotency -CLArray{T,N,B}(xs::CLArray{T,N,B}) where {T,N,B} = xs -CLArray{T,N}(xs::CLArray{T,N,B}) where {T,N,B} = xs +CLArray{T, N, B}(xs::CLArray{T, N, B}) where {T, N, B} = xs +CLArray{T, N}(xs::CLArray{T, N, B}) where {T, N, B} = xs # Level CLro references cl.CLRef(x::Any) = cl.CLRefArray(CLArray([x])) @@ -397,7 +409,7 @@ cl.CLRef{T}() where {T} = cl.CLRefArray(CLArray{T}(undef, 1)) ## conversions -Base.convert(::Type{T}, x::T) where T <: CLArray = x +Base.convert(::Type{T}, x::T) where {T <: CLArray} = x #= # defer the conversion to Managed, where we handle memory consistency # XXX: conversion to Buffer or Managed memory by cconvert? @@ -409,83 +421,91 @@ Base.unsafe_convert(typ::Type{CLPtr{T}}, x::CLArray{T}) where {T} = ## indexing -function Base.getindex(x::CLArray{<:Any, <:Any, <:Union{cl.HostBuffer,cl.SharedBuffer}}, I::Int) - @boundscheck checkbounds(x, I) - unsafe_load(pointer(x, I; type=cl.HostBuffer)) +function Base.getindex(x::CLArray{<:Any, <:Any, <:Union{cl.HostBuffer, cl.SharedBuffer}}, I::Int) + @boundscheck checkbounds(x, I) + return unsafe_load(pointer(x, I; type = cl.HostBuffer)) end -function Base.setindex!(x::CLArray{<:Any, <:Any, <:Union{cl.HostBuffer,cl.SharedBuffer}}, v, I::Int) - @boundscheck checkbounds(x, I) - unsafe_store!(pointer(x, I; type=cl.HostBuffer), v) +function Base.setindex!(x::CLArray{<:Any, <:Any, <:Union{cl.HostBuffer, cl.SharedBuffer}}, v, I::Int) + @boundscheck checkbounds(x, I) + return unsafe_store!(pointer(x, I; type = cl.HostBuffer), v) end ## interop with libraries function Base.unsafe_convert(::Type{Ptr{T}}, x::CLArray{T}) where {T} - buf = x.data[] - if is_device(x) - throw(ArgumentError("cannot take the CPU address of a $(typeof(x))")) - end - convert(Ptr{T}, x.data[]) + x.offset*Base.elsize(x) + buf = x.data[] + if is_device(x) + throw(ArgumentError("cannot take the CPU address of a $(typeof(x))")) + end + return convert(Ptr{T}, x.data[]) + x.offset * Base.elsize(x) end function Base.unsafe_convert(::Type{CLPtr{T}}, x::CLArray{T}) where {T} - convert(CLPtr{T}, x.data[]) + x.offset*Base.elsize(x) + return convert(CLPtr{T}, x.data[]) + x.offset * Base.elsize(x) end # interop with GPU arrays -function Base.unsafe_convert(::Type{CLDeviceArray{T,N,AS.Global}}, a::CLArray{T,N}) where {T,N} - CLDeviceArray{T,N,AS.Global}(size(a), reinterpret(LLVMPtr{T,AS.Global}, pointer(a)), - a.maxsize - a.offset*Base.elsize(a)) +function Base.unsafe_convert(::Type{CLDeviceArray{T, N, AS.Global}}, a::CLArray{T, N}) where {T, N} + return CLDeviceArray{T, N, AS.Global}( + size(a), reinterpret(LLVMPtr{T, AS.Global}, pointer(a)), + a.maxsize - a.offset * Base.elsize(a) + ) end ## memory copying synchronize(x::CLArray) = synchronize(x.data[]) -typetagdata(a::Array, i=1) = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a) + i - 1 -typetagdata(a::CLArray, i=1) = - convert(CLPtr{UInt8}, a.data[]) + a.maxsize + a.offset + i - 1 - -function Base.copyto!(dest::CLArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, - n::Integer) where T - n==0 && return dest - @boundscheck checkbounds(dest, doffs) - @boundscheck checkbounds(dest, doffs+n-1) - @boundscheck checkbounds(src, soffs) - @boundscheck checkbounds(src, soffs+n-1) - unsafe_copyto!(context(dest), cl.device(), dest, doffs, src, soffs, n) - return dest +typetagdata(a::Array, i = 1) = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a) + i - 1 +typetagdata(a::CLArray, i = 1) = + convert(CLPtr{UInt8}, a.data[]) + a.maxsize + a.offset + i - 1 + +function Base.copyto!( + dest::CLArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, + n::Integer + ) where {T} + n == 0 && return dest + @boundscheck checkbounds(dest, doffs) + @boundscheck checkbounds(dest, doffs + n - 1) + @boundscheck checkbounds(src, soffs) + @boundscheck checkbounds(src, soffs + n - 1) + unsafe_copyto!(context(dest), cl.device(), dest, doffs, src, soffs, n) + return dest end Base.copyto!(dest::DenseCLArray{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) -function Base.copyto!(dest::Array{T}, doffs::Integer, src::DenseCLArray{T}, soffs::Integer, - n::Integer) where T - n==0 && return dest - @boundscheck checkbounds(dest, doffs) - @boundscheck checkbounds(dest, doffs+n-1) - @boundscheck checkbounds(src, soffs) - @boundscheck checkbounds(src, soffs+n-1) - unsafe_copyto!(context(src), cl.device(), dest, doffs, src, soffs, n) - return dest +function Base.copyto!( + dest::Array{T}, doffs::Integer, src::DenseCLArray{T}, soffs::Integer, + n::Integer + ) where {T} + n == 0 && return dest + @boundscheck checkbounds(dest, doffs) + @boundscheck checkbounds(dest, doffs + n - 1) + @boundscheck checkbounds(src, soffs) + @boundscheck checkbounds(src, soffs + n - 1) + unsafe_copyto!(context(src), cl.device(), dest, doffs, src, soffs, n) + return dest end Base.copyto!(dest::Array{T}, src::DenseCLArray{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) -function Base.copyto!(dest::DenseCLArray{T}, doffs::Integer, src::DenseCLArray{T}, soffs::Integer, - n::Integer) where T - n==0 && return dest - @boundscheck checkbounds(dest, doffs) - @boundscheck checkbounds(dest, doffs+n-1) - @boundscheck checkbounds(src, soffs) - @boundscheck checkbounds(src, soffs+n-1) - @assert context(dest) == context(src) - unsafe_copyto!(context(dest), cl.device(), dest, doffs, src, soffs, n) - return dest +function Base.copyto!( + dest::DenseCLArray{T}, doffs::Integer, src::DenseCLArray{T}, soffs::Integer, + n::Integer + ) where {T} + n == 0 && return dest + @boundscheck checkbounds(dest, doffs) + @boundscheck checkbounds(dest, doffs + n - 1) + @boundscheck checkbounds(src, soffs) + @boundscheck checkbounds(src, soffs + n - 1) + @assert context(dest) == context(src) + unsafe_copyto!(context(dest), cl.device(), dest, doffs, src, soffs, n) + return dest end Base.copyto!(dest::DenseCLArray{T}, src::DenseCLArray{T}) where {T} = @@ -493,94 +513,108 @@ Base.copyto!(dest::DenseCLArray{T}, src::DenseCLArray{T}) where {T} = for (srcty, dstty) in [(:Array, :CLArray), (:CLArray, :Array), (:CLArray, :CLArray)] @eval begin - function Base.unsafe_copyto!(dst::$dstty{T}, dst_off::Int, - src::$srcty{T}, src_off::Int, - N::Int; blocking::Bool=true) where T + function Base.unsafe_copyto!( + dst::$dstty{T}, dst_off::Int, + src::$srcty{T}, src_off::Int, + N::Int; blocking::Bool = true + ) where {T} nbytes = N * sizeof(T) - cl.enqueue_usm_memcpy(pointer(dst, dst_off), pointer(src, src_off), nbytes; - blocking) + return cl.enqueue_usm_memcpy( + pointer(dst, dst_off), pointer(src, src_off), nbytes; + blocking + ) end Base.unsafe_copyto!(dst::$dstty, src::$srcty, N; kwargs...) = unsafe_copyto!(dst, 1, src, 1, N; kwargs...) end end -function Base.unsafe_copyto!(ctx::cl.Context, dev::cl.Device, - dest::DenseCLArray{T}, doffs, src::Array{T}, soffs, n) where T - GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) - if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") - end - return dest +function Base.unsafe_copyto!( + ctx::cl.Context, dev::cl.Device, + dest::DenseCLArray{T}, doffs, src::Array{T}, soffs, n + ) where {T} + GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) + if Base.isbitsunion(T) + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end + return dest end -function Base.unsafe_copyto!(ctx::cl.Context, dev::cl.Device, - dest::Array{T}, doffs, src::DenseCLArray{T}, soffs, n) where T - GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) - if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") - end +function Base.unsafe_copyto!( + ctx::cl.Context, dev::cl.Device, + dest::Array{T}, doffs, src::DenseCLArray{T}, soffs, n + ) where {T} + GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) + if Base.isbitsunion(T) + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end - # copies to the host are synchronizing - synchronize(src) + # copies to the host are synchronizing + synchronize(src) - return dest + return dest end -function Base.unsafe_copyto!(ctx::cl.Context, dev::cl.Device, - dest::DenseCLArray{T}, doffs, src::DenseCLArray{T}, soffs, n) where T - GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) - if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") - end - return dest +function Base.unsafe_copyto!( + ctx::cl.Context, dev::cl.Device, + dest::DenseCLArray{T}, doffs, src::DenseCLArray{T}, soffs, n + ) where {T} + GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) + if Base.isbitsunion(T) + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end + return dest end # between Array and host-accessible CLArray -function Base.unsafe_copyto!(ctx::cl.cl.Context, dev::cl.Device, - dest::DenseCLArray{T,<:Any,<:Union{cl.SharedBuffer,cl.HostBuffer}}, doffs, src::Array{T}, soffs, n) where T - # maintain queue-ordered semantics - synchronize(dest) +function Base.unsafe_copyto!( + ctx::cl.cl.Context, dev::cl.Device, + dest::DenseCLArray{T, <:Any, <:Union{cl.SharedBuffer, cl.HostBuffer}}, doffs, src::Array{T}, soffs, n + ) where {T} + # maintain queue-ordered semantics + synchronize(dest) - if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") - end - GC.@preserve src dest begin - ptr = pointer(dest, doffs) - unsafe_copyto!(pointer(dest, doffs; type=cl.HostBuffer), pointer(src, soffs), n) if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end + GC.@preserve src dest begin + ptr = pointer(dest, doffs) + unsafe_copyto!(pointer(dest, doffs; type = cl.HostBuffer), pointer(src, soffs), n) + if Base.isbitsunion(T) + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end end - end - return dest + return dest end -function Base.unsafe_copyto!(ctx::cl.Context, dev::cl.Device, - dest::Array{T}, doffs, src::DenseCLArray{T,<:Any,<:Union{cl.SharedBuffer,cl.HostBuffer}}, soffs, n) where T - # maintain queue-ordered semantics - synchronize(src) +function Base.unsafe_copyto!( + ctx::cl.Context, dev::cl.Device, + dest::Array{T}, doffs, src::DenseCLArray{T, <:Any, <:Union{cl.SharedBuffer, cl.HostBuffer}}, soffs, n + ) where {T} + # maintain queue-ordered semantics + synchronize(src) - if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") - end - GC.@preserve src dest begin - ptr = pointer(dest, doffs) - unsafe_copyto!(pointer(dest, doffs), pointer(src, soffs; type=cl.HostBuffer), n) if Base.isbitsunion(T) - # copy selector bytes - error("CLArray does not yet support isbits-union arrays") + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end + GC.@preserve src dest begin + ptr = pointer(dest, doffs) + unsafe_copyto!(pointer(dest, doffs), pointer(src, soffs; type = cl.HostBuffer), n) + if Base.isbitsunion(T) + # copy selector bytes + error("CLArray does not yet support isbits-union arrays") + end end - end - return dest + return dest end #= TODO: LOOK INTO IF THIS OPTIMIZATION CAN BE SUPPORTED @@ -629,16 +663,16 @@ end # We don't convert isbits types in `adapt`, since they are already # considered GPU-compatible. -Adapt.adapt_storage(::Type{CLArray}, xs::AT) where {AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(CLArray, xs) +Adapt.adapt_storage(::Type{CLArray}, xs::AT) where {AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(CLArray, xs) # if specific type parameters are specified, preserve those -Adapt.adapt_storage(::Type{<:CLArray{T}}, xs::AT) where {T, AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(CLArray{T}, xs) -Adapt.adapt_storage(::Type{<:CLArray{T, N}}, xs::AT) where {T, N, AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(CLArray{T,N}, xs) -Adapt.adapt_storage(::Type{<:CLArray{T, N, M}}, xs::AT) where {T, N, M, AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(CLArray{T,N,M}, xs) +Adapt.adapt_storage(::Type{<:CLArray{T}}, xs::AT) where {T, AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(CLArray{T}, xs) +Adapt.adapt_storage(::Type{<:CLArray{T, N}}, xs::AT) where {T, N, AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(CLArray{T, N}, xs) +Adapt.adapt_storage(::Type{<:CLArray{T, N, M}}, xs::AT) where {T, N, M, AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(CLArray{T, N, M}, xs) #= TODO: LOOK INTO IF THIS IS OKAY OR NOT, LATER ## opinionated gpu array adaptor @@ -688,22 +722,22 @@ function Base.fill!(A::DenseCLArray{T}, x) where T <: MemsetCompatTypes end =# -function Base.fill!(A::DenseCLArray{T}, val) where T - B = [convert(T, val)] - unsafe_fill!(context(A), cl.device(), pointer(A), pointer(B), length(A)) - A +function Base.fill!(A::DenseCLArray{T}, val) where {T} + B = [convert(T, val)] + unsafe_fill!(context(A), cl.device(), pointer(A), pointer(B), length(A)) + return A end ## derived arrays -function GPUArrays.derive(::Type{T}, a::CLArray, dims::Dims{N}, offset::Int) where {T,N} - offset = if sizeof(T) == 0 - Base.elsize(a) == 0 || error("Cannot derive a singleton array from non-singleton inputs") - offset - else - (a.offset * Base.elsize(a)) ÷ sizeof(T) + offset - end - CLArray{T,N}(a.data, dims; a.maxsize, offset) +function GPUArrays.derive(::Type{T}, a::CLArray, dims::Dims{N}, offset::Int) where {T, N} + offset = if sizeof(T) == 0 + Base.elsize(a) == 0 || error("Cannot derive a singleton array from non-singleton inputs") + offset + else + (a.offset * Base.elsize(a)) ÷ sizeof(T) + offset + end + return CLArray{T, N}(a.data, dims; a.maxsize, offset) end ## views @@ -712,13 +746,13 @@ device(a::SubArray) = device(parent(a)) context(a::SubArray) = context(parent(a)) # pointer conversions -function Base.unsafe_convert(::Type{CLPtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Base.RangeIndex}}}) where {T,N,P} +function Base.unsafe_convert(::Type{CLPtr{T}}, V::SubArray{T, N, P, <:Tuple{Vararg{Base.RangeIndex}}}) where {T, N, P} return Base.unsafe_convert(CLPtr{T}, parent(V)) + - Base._memory_offset(V.parent, map(first, V.indices)...) + Base._memory_offset(V.parent, map(first, V.indices)...) end -function Base.unsafe_convert(::Type{CLPtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{Base.RangeIndex,Base.ReshapedUnitRange}}}}) where {T,N,P} - return Base.unsafe_convert(CLPtr{T}, parent(V)) + - (Base.first_index(V)-1)*sizeof(T) +function Base.unsafe_convert(::Type{CLPtr{T}}, V::SubArray{T, N, P, <:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange}}}}) where {T, N, P} + return Base.unsafe_convert(CLPtr{T}, parent(V)) + + (Base.first_index(V) - 1) * sizeof(T) end @@ -738,15 +772,14 @@ Base.unsafe_convert(::Type{CLPtr{T}}, A::PermutedDimsArray) where {T} = Wrap a Julia `Array` around the buffer that backs a `CLArray`. This is only possible if the GPU array is backed by a shared buffer, i.e. if it was created with `CLArray{T}(undef, ...)`. """ -function Base.unsafe_wrap(::Type{Array}, arr::CLArray{T,N,cl.SharedBuffer}) where {T,N} - # TODO: can we make this more convenient by increasing the buffer's refcount and using - # a finalizer on the Array? does that work when taking views etc of the Array? - ptr = reinterpret(Ptr{T}, pointer(arr)) - unsafe_wrap(Array, ptr, size(arr)) +function Base.unsafe_wrap(::Type{Array}, arr::CLArray{T, N, cl.SharedBuffer}) where {T, N} + # TODO: can we make this more convenient by increasing the buffer's refcount and using + # a finalizer on the Array? does that work when taking views etc of the Array? + ptr = reinterpret(Ptr{T}, pointer(arr)) + return unsafe_wrap(Array, ptr, size(arr)) end - ## resizing """ @@ -786,5 +819,5 @@ function Base.resize!(a::CLVector{T}, n::Integer) where {T} a.maxsize = maxsize a.offset = 0 - a + return a end diff --git a/src/broadcast.jl b/src/broadcast.jl index f3edea12..52ebf257 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -2,20 +2,22 @@ using Base.Broadcast: BroadcastStyle, Broadcasted -struct CLArrayStyle{N,B} <: AbstractGPUArrayStyle{N} end -CLArrayStyle{M,B}(::Val{N}) where {N,M,B} = CLArrayStyle{N,B}() +struct CLArrayStyle{N, B} <: AbstractGPUArrayStyle{N} end +CLArrayStyle{M, B}(::Val{N}) where {N, M, B} = CLArrayStyle{N, B}() # identify the broadcast style of a (wrapped) CLArray -BroadcastStyle(::Type{<:CLArray{T,N,B}}) where {T,N,B} = CLArrayStyle{N,B}() -BroadcastStyle(W::Type{<:WrappedCLArray{T,N}}) where {T,N} = +BroadcastStyle(::Type{<:CLArray{T, N, B}}) where {T, N, B} = CLArrayStyle{N, B}() +BroadcastStyle(W::Type{<:WrappedCLArray{T, N}}) where {T, N} = CLArrayStyle{N, buftype(Adapt.unwrap_type(W))}() # when we are dealing with different buffer styles, we cannot know # which one is better, so use shared memory -BroadcastStyle(::CLArrayStyle{N, B1}, - ::CLArrayStyle{N, B2}) where {N,B1,B2} = +BroadcastStyle( + ::CLArrayStyle{N, B1}, + ::CLArrayStyle{N, B2} +) where {N, B1, B2} = CLArrayStyle{N, cl.SharedBuffer}() # allocation of output arrays -Base.similar(bc::Broadcasted{CLArrayStyle{N,B}}, ::Type{T}, dims) where {T,N,B} = - similar(CLArray{T,length(dims),B}, dims) +Base.similar(bc::Broadcasted{CLArrayStyle{N, B}}, ::Type{T}, dims) where {T, N, B} = + similar(CLArray{T, length(dims), B}, dims) diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index 423439c1..c35487d7 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -2,18 +2,20 @@ struct OpenCLCompilerParams <: AbstractCompilerParams end const OpenCLCompilerConfig = CompilerConfig{SPIRVCompilerTarget, OpenCLCompilerParams} -const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget,OpenCLCompilerParams} +const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget, OpenCLCompilerParams} -GPUCompiler.runtime_module(::CompilerJob{<:Any,OpenCLCompilerParams}) = OpenCL +GPUCompiler.runtime_module(::CompilerJob{<:Any, OpenCLCompilerParams}) = OpenCL GPUCompiler.method_table(::OpenCLCompilerJob) = method_table # filter out OpenCL built-ins # TODO: eagerly lower these using the translator API GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) = - invoke(GPUCompiler.isintrinsic, - Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)}, - job, fn) || + invoke( + GPUCompiler.isintrinsic, + Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)}, + job, fn +) || in(fn, opencl_builtins) @@ -42,14 +44,14 @@ function compiler_config(dev::cl.Device; kwargs...) end return config end -@noinline function _compiler_config(dev; kernel=true, name=nothing, always_inline=false, kwargs...) +@noinline function _compiler_config(dev; kernel = true, name = nothing, always_inline = false, kwargs...) supports_fp16 = "cl_khr_fp16" in dev.extensions supports_fp64 = "cl_khr_fp64" in dev.extensions # create GPUCompiler objects target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, kwargs...) params = OpenCLCompilerParams() - CompilerConfig(target, params; kernel, name, always_inline) + return CompilerConfig(target, params; kernel, name, always_inline) end # compile to executable machine code @@ -59,13 +61,13 @@ function compile(@nospecialize(job::CompilerJob)) GPUCompiler.compile(:obj, job) end - (obj, entry=LLVM.name(meta.entry)) + return (obj, entry = LLVM.name(meta.entry)) end # link into an executable kernel function link(@nospecialize(job::CompilerJob), compiled) prog = if "cl_khr_il_program" in cl.device().extensions - cl.Program(; il=compiled.obj) + cl.Program(; il = compiled.obj) else error("Your device does not support SPIR-V, which is currently required for native execution.") # XXX: kpet/spirv2clc#87, caused by KhronosGroup/SPIRV-LLVM-Translator#2029 @@ -78,5 +80,5 @@ function link(@nospecialize(job::CompilerJob), compiled) cl.Program(; source) end cl.build!(prog) - cl.Kernel(prog, compiled.entry) + return cl.Kernel(prog, compiled.entry) end diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl index 00f368b0..23c756aa 100644 --- a/src/compiler/execution.jl +++ b/src/compiler/execution.jl @@ -9,7 +9,7 @@ const LAUNCH_KWARGS = [:global_size, :local_size, :queue] macro opencl(ex...) call = ex[end] - kwargs = map(ex[1:end-1]) do kwarg + kwargs = map(ex[1:(end - 1)]) do kwarg if kwarg isa Symbol :($kwarg = $kwarg) elseif Meta.isexpr(kwarg, :(=)) @@ -31,14 +31,14 @@ macro opencl(ex...) macro_kwargs, compiler_kwargs, call_kwargs, other_kwargs = split_kwargs(kwargs, MACRO_KWARGS, COMPILER_KWARGS, LAUNCH_KWARGS) if !isempty(other_kwargs) - key,val = first(other_kwargs).args + key, val = first(other_kwargs).args throw(ArgumentError("Unsupported keyword argument '$key'")) end # handle keyword arguments that influence the macro's behavior launch = true for kwarg in macro_kwargs - key,val = kwarg.args + key, val = kwarg.args if key == :launch isa(val, Bool) || throw(ArgumentError("`launch` keyword argument to @opencl should be a constant value")) launch = val::Bool @@ -56,7 +56,8 @@ macro opencl(ex...) # convert the arguments, call the compiler and launch the kernel # while keeping the original arguments alive - push!(code.args, + push!( + code.args, quote $f_var = $f GC.@preserve $(vars...) $f_var begin @@ -69,13 +70,16 @@ macro opencl(ex...) end $kernel end - end) + end + ) - return esc(quote - let - $code + return esc( + quote + let + $code + end end - end) + ) end @@ -87,13 +91,13 @@ struct KernelAdaptor end Adapt.adapt_storage(to::KernelAdaptor, p::CLPtr{T}) where {T} = reinterpret(Ptr{T}, p) # convert OpenCL USM host arrays to device arrays -Adapt.adapt_storage(::KernelAdaptor, xs::CLArray{T,N}) where {T,N} = - Base.unsafe_convert(CLDeviceArray{T,N,AS.Global}, xs) +Adapt.adapt_storage(::KernelAdaptor, xs::CLArray{T, N}) where {T, N} = + Base.unsafe_convert(CLDeviceArray{T, N, AS.Global}, xs) # Base.RefValue isn't GPU compatible, so provide a compatible alternative # TODO: port improvements from CUDA.jl struct CLRefValue{T} <: Ref{T} - x::T + x::T end Base.getindex(r::CLRefValue) = r.x Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue) = CLRefValue(adapt(to, r[])) @@ -101,13 +105,15 @@ Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue) = CLRefValue(adapt(to # broadcast sometimes passes a ref(type), resulting in a GPU-incompatible DataType box. # avoid that by using a special kind of ref that knows about the boxed type. struct CLRefType{T} <: Ref{DataType} end -Base.getindex(r::CLRefType{T}) where T = T -Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType,Type}}) = +Base.getindex(r::CLRefType{T}) where {T} = T +Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType, Type}}) = CLRefType{r[]}() # case where type is the function being broadcasted -Adapt.adapt_structure(to::KernelAdaptor, - bc::Broadcast.Broadcasted{Style, <:Any, Type{T}}) where {Style, T} = +Adapt.adapt_structure( + to::KernelAdaptor, + bc::Broadcast.Broadcasted{Style, <:Any, Type{T}} +) where {Style, T} = Broadcast.Broadcasted{Style}((x...) -> T(x...), adapt(to, bc.args), bc.axes) """ @@ -125,20 +131,20 @@ kernel_convert(arg) = adapt(KernelAdaptor(), arg) ## abstract kernel functionality -abstract type AbstractKernel{F,TT} end +abstract type AbstractKernel{F, TT} end -@inline @generated function call(kernel::AbstractKernel{F,TT}, args...; call_kwargs...) where {F,TT} +@inline @generated function call(kernel::AbstractKernel{F, TT}, args...; call_kwargs...) where {F, TT} sig = Tuple{F, TT.parameters...} # Base.signature_type with a function type - args = (:(kernel.f), (:( args[$i] ) for i in 1:length(args))...) + args = (:(kernel.f), (:(args[$i]) for i in 1:length(args))...) # filter out ghost arguments that shouldn't be passed predicate = dt -> isghosttype(dt) || Core.Compiler.isconstType(dt) to_pass = map(!predicate, sig.parameters) - call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]] - call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]] + call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]] + call_args = Union{Expr, Symbol}[x[1] for x in zip(args, to_pass) if x[2]] # replace non-isbits arguments (they should be unused, or compilation would have failed) - for (i,dt) in enumerate(call_t) + for (i, dt) in enumerate(call_t) if !isbitstype(dt) call_t[i] = Ptr{Any} call_args[i] = :C_NULL @@ -148,16 +154,15 @@ abstract type AbstractKernel{F,TT} end # finalize types call_tt = Base.to_tuple_type(call_t) - quote + return quote clcall(kernel.fun, $call_tt, $(call_args...); call_kwargs...) end end - ## host-side kernels -struct HostKernel{F,TT} <: AbstractKernel{F,TT} +struct HostKernel{F, TT} <: AbstractKernel{F, TT} f::F fun::cl.Kernel end @@ -167,7 +172,7 @@ end const clfunction_lock = ReentrantLock() -function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT} +function clfunction(f::F, tt::TT = Tuple{}; kwargs...) where {F, TT} ctx = cl.context() dev = cl.device() @@ -184,10 +189,10 @@ function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT} kernel = get(_kernel_instances, h, nothing) if kernel === nothing # create the kernel state object - kernel = HostKernel{F,tt}(f, fun) + kernel = HostKernel{F, tt}(f, fun) _kernel_instances[h] = kernel end - return kernel::HostKernel{F,tt} + return kernel::HostKernel{F, tt} end end @@ -195,5 +200,5 @@ end const _kernel_instances = Dict{UInt, Any}() function (kernel::HostKernel)(args...; kwargs...) - call(kernel, map(kernel_convert, args)...; kwargs...) + return call(kernel, map(kernel_convert, args)...; kwargs...) end diff --git a/src/compiler/reflection.jl b/src/compiler/reflection.jl index a8bb7143..8c17f609 100644 --- a/src/compiler/reflection.jl +++ b/src/compiler/reflection.jl @@ -24,13 +24,15 @@ for method in (:code_typed, :code_warntype, :code_llvm, :code_native) args = method == :code_typed ? (:job,) : (:io, :job) @eval begin - function $method(io::IO, @nospecialize(func), @nospecialize(types); - kernel::Bool=false, kwargs...) + function $method( + io::IO, @nospecialize(func), @nospecialize(types); + kernel::Bool = false, kwargs... + ) compiler_kwargs, kwargs = split_kwargs_runtime(kwargs, COMPILER_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types)) config = compiler_config(cl.device(); kernel, compiler_kwargs...) job = CompilerJob(source, config) - GPUCompiler.$method($(args...); kwargs...) + return GPUCompiler.$method($(args...); kwargs...) end $method(@nospecialize(func), @nospecialize(types); kwargs...) = $method(stdout, func, types; kwargs...) @@ -38,13 +40,12 @@ for method in (:code_typed, :code_warntype, :code_llvm, :code_native) end - # # @device_code_* functions # export @device_code_lowered, @device_code_typed, @device_code_warntype, @device_code_llvm, - @device_code_native, @device_code + @device_code_native, @device_code # forward to GPUCompiler @eval $(Symbol("@device_code_lowered")) = $(getfield(GPUCompiler, Symbol("@device_code_lowered"))) @@ -70,5 +71,5 @@ function return_type(@nospecialize(func), @nospecialize(tt)) job = CompilerJob(source, config) interp = GPUCompiler.get_interpreter(job) sig = Base.signature_type(func, tt) - Core.Compiler.return_type(interp, sig) + return Core.Compiler.return_type(interp, sig) end diff --git a/src/device/array.jl b/src/device/array.jl index f9726222..63757911 100644 --- a/src/device/array.jl +++ b/src/device/array.jl @@ -8,8 +8,8 @@ export CLDeviceArray, CLDeviceVector, CLDeviceMatrix, CLLocalArray # NOTE: we can't support the typical `tuple or series of integer` style construction, # because we're currently requiring a trailing pointer argument. -struct CLDeviceArray{T,N,A} <: DenseArray{T,N} - ptr::LLVMPtr{T,A} +struct CLDeviceArray{T, N, A} <: DenseArray{T, N} + ptr::LLVMPtr{T, A} maxsize::Int dims::Dims{N} @@ -17,27 +17,29 @@ struct CLDeviceArray{T,N,A} <: DenseArray{T,N} # inner constructors, fully parameterized, exact types (ie. Int not <:Integer) # TODO: deprecate; put `ptr` first like oneArray - CLDeviceArray{T,N,A}(dims::Dims{N}, ptr::LLVMPtr{T,A}, - maxsize::Int=prod(dims)*sizeof(T)) where {T,A,N} = + CLDeviceArray{T, N, A}( + dims::Dims{N}, ptr::LLVMPtr{T, A}, + maxsize::Int = prod(dims) * sizeof(T) + ) where {T, A, N} = new(ptr, maxsize, dims, prod(dims)) end -const CLDeviceVector = CLDeviceArray{T,1,A} where {T,A} -const CLDeviceMatrix = CLDeviceArray{T,2,A} where {T,A} +const CLDeviceVector = CLDeviceArray{T, 1, A} where {T, A} +const CLDeviceMatrix = CLDeviceArray{T, 2, A} where {T, A} # outer constructors, non-parameterized -CLDeviceArray(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = CLDeviceArray{T,N,A}(dims, p) -CLDeviceArray(len::Integer, p::LLVMPtr{T,A}) where {T,A} = CLDeviceVector{T,A}((len,), p) +CLDeviceArray(dims::NTuple{N, <:Integer}, p::LLVMPtr{T, A}) where {T, A, N} = CLDeviceArray{T, N, A}(dims, p) +CLDeviceArray(len::Integer, p::LLVMPtr{T, A}) where {T, A} = CLDeviceVector{T, A}((len,), p) # outer constructors, partially parameterized -CLDeviceArray{T}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = CLDeviceArray{T,N,A}(dims, p) -CLDeviceArray{T}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = CLDeviceVector{T,A}((len,), p) -CLDeviceArray{T,N}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = CLDeviceArray{T,N,A}(dims, p) -CLDeviceVector{T}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = CLDeviceVector{T,A}((len,), p) +CLDeviceArray{T}(dims::NTuple{N, <:Integer}, p::LLVMPtr{T, A}) where {T, A, N} = CLDeviceArray{T, N, A}(dims, p) +CLDeviceArray{T}(len::Integer, p::LLVMPtr{T, A}) where {T, A} = CLDeviceVector{T, A}((len,), p) +CLDeviceArray{T, N}(dims::NTuple{N, <:Integer}, p::LLVMPtr{T, A}) where {T, A, N} = CLDeviceArray{T, N, A}(dims, p) +CLDeviceVector{T}(len::Integer, p::LLVMPtr{T, A}) where {T, A} = CLDeviceVector{T, A}((len,), p) # outer constructors, fully parameterized -CLDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = CLDeviceArray{T,N,A}(Int.(dims), p) -CLDeviceVector{T,A}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = CLDeviceVector{T,A}((Int(len),), p) +CLDeviceArray{T, N, A}(dims::NTuple{N, <:Integer}, p::LLVMPtr{T, A}) where {T, A, N} = CLDeviceArray{T, N, A}(Int.(dims), p) +CLDeviceVector{T, A}(len::Integer, p::LLVMPtr{T, A}) where {T, A} = CLDeviceVector{T, A}((Int(len),), p) ## array interface @@ -50,19 +52,19 @@ Base.sizeof(x::CLDeviceArray) = Base.elsize(x) * length(x) # we store the array length too; computing prod(size) is expensive Base.length(g::CLDeviceArray) = g.len -Base.pointer(x::CLDeviceArray{T,<:Any,A}) where {T,A} = Base.unsafe_convert(LLVMPtr{T,A}, x) -@inline function Base.pointer(x::CLDeviceArray{T,<:Any,A}, i::Integer) where {T,A} - Base.unsafe_convert(LLVMPtr{T,A}, x) + Base._memory_offset(x, i) +Base.pointer(x::CLDeviceArray{T, <:Any, A}) where {T, A} = Base.unsafe_convert(LLVMPtr{T, A}, x) +@inline function Base.pointer(x::CLDeviceArray{T, <:Any, A}, i::Integer) where {T, A} + return Base.unsafe_convert(LLVMPtr{T, A}, x) + Base._memory_offset(x, i) end -typetagdata(a::CLDeviceArray{<:Any,<:Any,A}, i=1) where {A} = - reinterpret(LLVMPtr{UInt8,A}, a.ptr + a.maxsize) + i - one(i) +typetagdata(a::CLDeviceArray{<:Any, <:Any, A}, i = 1) where {A} = + reinterpret(LLVMPtr{UInt8, A}, a.ptr + a.maxsize) + i - one(i) ## conversions -Base.unsafe_convert(::Type{LLVMPtr{T,A}}, x::CLDeviceArray{T,<:Any,A}) where {T,A} = - x.ptr +Base.unsafe_convert(::Type{LLVMPtr{T, A}}, x::CLDeviceArray{T, <:Any, A}) where {T, A} = + x.ptr ## indexing intrinsics @@ -72,7 +74,7 @@ Base.unsafe_convert(::Type{LLVMPtr{T,A}}, x::CLDeviceArray{T,<:Any,A}) where {T, # (cfr. shared memory and its wider-than-datatype alignment) @generated function alignment(::CLDeviceArray{T}) where {T} - if Base.isbitsunion(T) + return if Base.isbitsunion(T) _, sz, al = Base.uniontype_layout(T) al else @@ -91,10 +93,10 @@ end @inline function arrayref_bits(A::CLDeviceArray{T}, index::Integer) where {T} align = alignment(A) - unsafe_load(pointer(A), index, Val(align)) + return unsafe_load(pointer(A), index, Val(align)) end -@inline @generated function arrayref_union(A::CLDeviceArray{T,<:Any,AS}, index::Integer) where {T,AS} +@inline @generated function arrayref_union(A::CLDeviceArray{T, <:Any, AS}, index::Integer) where {T, AS} typs = Base.uniontypes(T) # generate code that conditionally loads a value based on the selector value. @@ -102,8 +104,8 @@ end ex = :(Base.llvmcall("unreachable", $T, Tuple{})) for (sel, typ) in Iterators.reverse(enumerate(typs)) ex = quote - if selector == $(sel-1) - ptr = reinterpret(LLVMPtr{$typ,AS}, data_ptr) + if selector == $(sel - 1) + ptr = reinterpret(LLVMPtr{$typ, AS}, data_ptr) unsafe_load(ptr, 1, Val(align)) else $ex @@ -111,7 +113,7 @@ end end end - quote + return quote selector_ptr = typetagdata(A, index) selector = unsafe_load(selector_ptr) @@ -134,21 +136,21 @@ end @inline function arrayset_bits(A::CLDeviceArray{T}, x::T, index::Integer) where {T} align = alignment(A) - unsafe_store!(pointer(A), x, index, Val(align)) + return unsafe_store!(pointer(A), x, index, Val(align)) end -@inline @generated function arrayset_union(A::CLDeviceArray{T,<:Any,AS}, x::T, index::Integer) where {T,AS} +@inline @generated function arrayset_union(A::CLDeviceArray{T, <:Any, AS}, x::T, index::Integer) where {T, AS} typs = Base.uniontypes(T) sel = findfirst(isequal(x), typs) - quote + return quote selector_ptr = typetagdata(A, index) - unsafe_store!(selector_ptr, $(UInt8(sel-1))) + unsafe_store!(selector_ptr, $(UInt8(sel - 1))) align = alignment(A) data_ptr = pointer(A, index) - unsafe_store!(reinterpret(LLVMPtr{$x,AS}, data_ptr), x, 1, Val(align)) + unsafe_store!(reinterpret(LLVMPtr{$x, AS}, data_ptr), x, 1, Val(align)) return end end @@ -167,7 +169,7 @@ Base.IndexStyle(::Type{<:CLDeviceArray}) = Base.IndexLinear() Base.@propagate_inbounds Base.getindex(A::CLDeviceArray{T}, i1::Integer) where {T} = arrayref(A, i1) Base.@propagate_inbounds Base.setindex!(A::CLDeviceArray{T}, x, i1::Integer) where {T} = - arrayset(A, convert(T,x)::T, i1) + arrayset(A, convert(T, x)::T, i1) # preserve the specific integer type when indexing device arrays, # to avoid extending 32-bit hardware indices to 64-bit. @@ -175,11 +177,15 @@ Base.to_index(::CLDeviceArray, i::Integer) = i # Base doesn't like Integer indices, so we need our own ND get and setindex! routines. # See also: https://github.com/JuliaLang/julia/pull/42289 -Base.@propagate_inbounds Base.getindex(A::CLDeviceArray, - I::Union{Integer, CartesianIndex}...) = +Base.@propagate_inbounds Base.getindex( + A::CLDeviceArray, + I::Union{Integer, CartesianIndex}... +) = A[Base._to_linear_index(A, to_indices(A, I)...)] -Base.@propagate_inbounds Base.setindex!(A::CLDeviceArray, x, - I::Union{Integer, CartesianIndex}...) = +Base.@propagate_inbounds Base.setindex!( + A::CLDeviceArray, x, + I::Union{Integer, CartesianIndex}... +) = A[Base._to_linear_index(A, to_indices(A, I)...)] = x @@ -196,8 +202,8 @@ This API can only be used on devices with compute capability 3.5 or higher. !!! warning Experimental API. Subject to change without deprecation. """ -struct Const{T,N,AS} <: DenseArray{T,N} - a::CLDeviceArray{T,N,AS} +struct Const{T, N, AS} <: DenseArray{T, N} + a::CLDeviceArray{T, N, AS} end Base.Experimental.Const(A::CLDeviceArray) = Const(A) @@ -216,26 +222,26 @@ Base.show(io::IO, a::CLDeviceArray) = Base.show(io::IO, mime::MIME"text/plain", a::CLDeviceArray) = show(io, a) -@inline function Base.iterate(A::CLDeviceArray, i=1) - if (i % UInt) - 1 < length(A) +@inline function Base.iterate(A::CLDeviceArray, i = 1) + return if (i % UInt) - 1 < length(A) (@inbounds A[i], i + 1) else nothing end end -function Base.reinterpret(::Type{T}, a::CLDeviceArray{S,N,A}) where {T,S,N,A} - err = _reinterpret_exception(T, a) - err === nothing || throw(err) +function Base.reinterpret(::Type{T}, a::CLDeviceArray{S, N, A}) where {T, S, N, A} + err = _reinterpret_exception(T, a) + err === nothing || throw(err) - if sizeof(T) == sizeof(S) # fast case - return CLDeviceArray{T,N,A}(size(a), reinterpret(LLVMPtr{T,A}, a.ptr), a.maxsize) - end + if sizeof(T) == sizeof(S) # fast case + return CLDeviceArray{T, N, A}(size(a), reinterpret(LLVMPtr{T, A}, a.ptr), a.maxsize) + end - isize = size(a) - size1 = div(isize[1]*sizeof(S), sizeof(T)) - osize = tuple(size1, Base.tail(isize)...) - return CLDeviceArray{T,N,A}(osize, reinterpret(LLVMPtr{T,A}, a.ptr), a.maxsize) + isize = size(a) + size1 = div(isize[1] * sizeof(S), sizeof(T)) + osize = tuple(size1, Base.tail(isize)...) + return CLDeviceArray{T, N, A}(osize, reinterpret(LLVMPtr{T, A}, a.ptr), a.maxsize) end @@ -248,5 +254,5 @@ end # NOTE: this relies on const-prop to forward the literal length to the generator. # maybe we should include the size in the type, like StaticArrays does? ptr = emit_localmemory(T, Val(len)) - CLDeviceArray(dims, ptr) + return CLDeviceArray(dims, ptr) end diff --git a/src/device/quirks.jl b/src/device/quirks.jl index 9cdf15e9..c76bd789 100644 --- a/src/device/quirks.jl +++ b/src/device/quirks.jl @@ -1,5 +1,5 @@ macro print_and_throw(args...) - quote + return quote @println "ERROR: " $(args...) "." throw(nothing) end diff --git a/src/gpuarrays.jl b/src/gpuarrays.jl index 2f647a81..644f53cc 100644 --- a/src/gpuarrays.jl +++ b/src/gpuarrays.jl @@ -1,7 +1,7 @@ -const GLOBAL_RNGs = Dict{cl.Device,GPUArrays.RNG}() +const GLOBAL_RNGs = Dict{cl.Device, GPUArrays.RNG}() function GPUArrays.default_rng(::Type{<:CLArray}) dev = cl.device() - get!(GLOBAL_RNGs, dev) do + return get!(GLOBAL_RNGs, dev) do N = dev.max_work_group_size state = CLArray{NTuple{4, UInt32}}(undef, N) rng = GPUArrays.RNG(state) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index 2715d0b3..d5d8825c 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -18,10 +18,10 @@ d = 1 while d < items barrier() - index = 2 * d * (item-1) + 1 + index = 2 * d * (item - 1) + 1 @inbounds if index <= items other_val = if index + d <= items - shared[index+d] + shared[index + d] else neutral end @@ -91,9 +91,11 @@ end ## COV_EXCL_STOP -function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedCLArray{T}, - A::Union{AbstractArray,Broadcast.Broadcasted}; - init=nothing) where {F, OP, T} +function GPUArrays.mapreducedim!( + f::F, op::OP, R::WrappedCLArray{T}, + A::Union{AbstractArray, Broadcast.Broadcasted}; + init = nothing + ) where {F, OP, T} Base.check_reducedims(R, A) length(A) == 0 && return R # isempty(::Broadcasted) iterates @@ -123,7 +125,7 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedCLArray{T}, # we want as many as possible to improve algorithm efficiency and execution occupancy. wanted_items = length(Rreduce) function compute_items(max_items) - if wanted_items > max_items + return if wanted_items > max_items max_items else wanted_items @@ -138,8 +140,10 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedCLArray{T}, # group size is restricted by local memory max_lmem_elements = cl.device().local_mem_size ÷ sizeof(T) - max_items = min(cl.device().max_work_group_size, - compute_items(max_lmem_elements ÷ 2)) + max_items = min( + cl.device().max_work_group_size, + compute_items(max_lmem_elements ÷ 2) + ) # TODO: dynamic local memory to avoid two compilations # let the driver suggest a group size @@ -160,13 +164,14 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedCLArray{T}, # determine the launch configuration local_size = reduce_items - global_size = reduce_items*reduce_groups*other_groups + global_size = reduce_items * reduce_groups * other_groups # perform the actual reduction if reduce_groups == 1 # we can cover the dimensions to reduce using a single group @opencl local_size global_size partial_mapreduce_device( - f, op, init, Val(local_size), Rreduce, Rother, R′, A) + f, op, init, Val(local_size), Rreduce, Rother, R′, A + ) else # we need multiple steps to cover all values to reduce partial = similar(R, (size(R)..., reduce_groups)) @@ -175,9 +180,10 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedCLArray{T}, partial .= R end @opencl local_size global_size partial_mapreduce_device( - f, op, init, Val(local_size), Rreduce, Rother, partial, A) + f, op, init, Val(local_size), Rreduce, Rother, partial, A + ) - GPUArrays.mapreducedim!(identity, op, R′, partial; init=init) + GPUArrays.mapreducedim!(identity, op, R′, partial; init = init) end return R diff --git a/src/memory.jl b/src/memory.jl index e49522e1..708d595a 100644 --- a/src/memory.jl +++ b/src/memory.jl @@ -7,44 +7,44 @@ # XXX: immutable with atomic refs? mutable struct Managed{M} - const mem::M + const mem::M - # which stream is currently using the memory. - queue::cl.CmdQueue + # which stream is currently using the memory. + queue::cl.CmdQueue - # whether there are outstanding operations that haven't been synchronized - dirty::Bool + # whether there are outstanding operations that haven't been synchronized + dirty::Bool - # whether the memory has been captured in a way that would make the dirty bit unreliable - captured::Bool + # whether the memory has been captured in a way that would make the dirty bit unreliable + captured::Bool - function Managed(mem::cl.AbstractBuffer; queue=cl.queue(), dirty=true, captured=false) - # NOTE: memory starts as dirty, because stream-ordered allocations are only - # guaranteed to be physically allocated at a synchronization event. - new{typeof(mem)}(mem, queue, dirty, captured) - end + function Managed(mem::cl.AbstractBuffer; queue = cl.queue(), dirty = true, captured = false) + # NOTE: memory starts as dirty, because stream-ordered allocations are only + # guaranteed to be physically allocated at a synchronization event. + return new{typeof(mem)}(mem, queue, dirty, captured) + end end # wait for the current owner of memory to finish processing function synchronize(managed::Managed) - cl.finish(managed.queue) - managed.dirty = false + cl.finish(managed.queue) + return managed.dirty = false end function maybe_synchronize(managed::Managed) - if managed.dirty || managed.captured - synchronize(managed) - end + return if managed.dirty || managed.captured + synchronize(managed) + end end -function Base.convert(::Type{CLPtr{T}}, managed::Managed{M}) where {T,M} - # let null pointers pass through as-is - ptr = convert(CLPtr{T}, managed.mem) - if ptr == cl.CL_NULL - return ptr - end +function Base.convert(::Type{CLPtr{T}}, managed::Managed{M}) where {T, M} + # let null pointers pass through as-is + ptr = convert(CLPtr{T}, managed.mem) + if ptr == cl.CL_NULL + return ptr + end - #= TODO: FIGURE OUT ACTIVE STATE + #= TODO: FIGURE OUT ACTIVE STATE # state = cl.active_state() # accessing memory on another device: ensure the data is ready and accessible @@ -79,27 +79,30 @@ function Base.convert(::Type{CLPtr{T}}, managed::Managed{M}) where {T,M} end =# - managed.dirty = true - return ptr + managed.dirty = true + return ptr end -function Base.convert(::Type{Ptr{T}}, managed::Managed{M}) where {T,M} - # let null pointers pass through as-is - ptr = convert(Ptr{T}, managed.mem) - if ptr == C_NULL - return ptr - end +function Base.convert(::Type{Ptr{T}}, managed::Managed{M}) where {T, M} + # let null pointers pass through as-is + ptr = convert(Ptr{T}, managed.mem) + if ptr == C_NULL + return ptr + end - # accessing memory on the CPU: only allowed for host or unified allocations - if M == cl.DeviceBuffer - throw(ArgumentError( - """cannot take the CPU address of GPU memory.""")) + # accessing memory on the CPU: only allowed for host or unified allocations + if M == cl.DeviceBuffer + throw( + ArgumentError( + """cannot take the CPU address of GPU memory.""" + ) + ) - end + end - # make sure any work on the memory has finished. - maybe_synchronize(managed) - return ptr + # make sure any work on the memory has finished. + maybe_synchronize(managed) + return ptr end #= function Base.unsafe_copyto!(dst::Ptr, src::Ptr, nbytes::Integer; blocking::Bool=false, @@ -166,19 +169,23 @@ function unsafe_fill!(ctx::cl.Context, dev::cl.Device, ptr::Union{Ptr{T},cl.CLPt end =# -function Base.unsafe_copyto!(::cl.Context, ::cl.Device, dst::Union{CLPtr{T}, Ptr{T}}, src::Union{CLPtr{T}, Ptr{T}}, N::Integer; - queu::cl.CmdQueue=cl.queue()) where T - - cl.enqueue_usm_memcpy(dst, src, N*sizeof(T); queu=queu) - +function Base.unsafe_copyto!( + ::cl.Context, ::cl.Device, dst::Union{CLPtr{T}, Ptr{T}}, src::Union{CLPtr{T}, Ptr{T}}, N::Integer; + queu::cl.CmdQueue = cl.queue() + ) where {T} + + cl.enqueue_usm_memcpy(dst, src, N * sizeof(T); queu = queu) + cl.finish(queu) return dst end -function unsafe_fill!(ctx::cl.Context, dev::cl.Device, ptr::Union{Ptr{T},CLPtr{T}}, - pattern::Union{Ptr{T},CLPtr{T}}, N::Integer; queu::cl.CmdQueue=cl.queue()) where T - bytes = N*sizeof(T) - bytes==0 && return +function unsafe_fill!( + ctx::cl.Context, dev::cl.Device, ptr::Union{Ptr{T}, CLPtr{T}}, + pattern::Union{Ptr{T}, CLPtr{T}}, N::Integer; queu::cl.CmdQueue = cl.queue() + ) where {T} + bytes = N * sizeof(T) + bytes == 0 && return cl.enqueue_usm_memfill(ptr, pattern, sizeof(T), bytes; queu = queu) - cl.finish(queu) + return cl.finish(queu) end diff --git a/src/pool.jl b/src/pool.jl index 03286d7b..dd19ba8c 100644 --- a/src/pool.jl +++ b/src/pool.jl @@ -6,18 +6,18 @@ export OutOfGPUMemoryError An operation allocated too much GPU memory. """ struct OutOfGPUMemoryError <: Exception - sz::Int - dev::cl.Device + sz::Int + dev::cl.Device - function OutOfGPUMemoryError(sz::Integer=0, dev::cl.Device=cl.device()) - new(sz, dev) - end + function OutOfGPUMemoryError(sz::Integer = 0, dev::cl.Device = cl.device()) + return new(sz, dev) + end end function Base.showerror(io::IO, err::OutOfGPUMemoryError) print(io, "Out of GPU memory") if err.sz > 0 - print(io, " trying to allocate $(Base.format_bytes(err.sz))") + print(io, " trying to allocate $(Base.format_bytes(err.sz))") end print(" on device $((err.dev).name)") #= @@ -32,7 +32,7 @@ end function allocate(::Type{cl.DeviceBuffer}, ctx, dev, bytes::Int, alignment::Int) bytes == 0 && return cl.DeviceBuffer(cl.CL_NULL, bytes, ctx, dev) - buf = cl.device_alloc(ctx, dev, bytes, alignment=alignment) + buf = cl.device_alloc(ctx, dev, bytes, alignment = alignment) # make_resident(ctx, dev, buf) return buf end @@ -42,14 +42,14 @@ function allocate(::Type{cl.SharedBuffer}, ctx, dev, bytes::Int, alignment::Int) # TODO: support cross-device shared buffers (by setting `dev=nothing`) - buf = cl.shared_alloc(ctx, dev, bytes, alignment=alignment) + buf = cl.shared_alloc(ctx, dev, bytes, alignment = alignment) return buf end function allocate(::Type{cl.HostBuffer}, ctx, dev, bytes::Int, alignment::Int) bytes == 0 && return cl.HostBuffer(cl.CL_NULL, bytes, ctx) - cl.host_alloc(ctx, bytes, alignment=alignment) + return cl.host_alloc(ctx, bytes, alignment = alignment) end function release(buf::cl.AbstractBuffer) @@ -67,11 +67,10 @@ function release(buf::cl.AbstractBuffer) # evict(ctx, dev, buf) #end - free(buf, blocking=true) + free(buf, blocking = true) # TODO: queue-ordered free from non-finalizer tasks once we have # `zeMemFreeAsync(ptr, queue)` return end - diff --git a/src/random.jl b/src/random.jl index 3456be01..655470d3 100644 --- a/src/random.jl +++ b/src/random.jl @@ -19,5 +19,4 @@ rand(dim1::Integer, dims::Integer...) = Random.rand!(CLArray{Float32}(undef, dim randn(dim1::Integer, dims::Integer...; kwargs...) = Random.randn!(CLArray{Float32}(undef, dim1, dims...); kwargs...) # seeding -seed!(seed=Base.rand(UInt64)) = Random.seed!(gpuarrays_rng(), seed) - +seed!(seed = Base.rand(UInt64)) = Random.seed!(gpuarrays_rng(), seed) diff --git a/src/util.jl b/src/util.jl index e9f1acc2..35b09b80 100644 --- a/src/util.jl +++ b/src/util.jl @@ -10,12 +10,12 @@ function format(s::String; vars...) for (k, v) in vars s = replace(s, "%($k)" => v) end - s + return s end function build_kernel(program::String, kernel_name::String; vars...) src = format(program; vars...) - p = cl.Program(source=src) + p = cl.Program(source = src) cl.build!(p) return cl.Kernel(p, kernel_name) end @@ -32,7 +32,7 @@ function get_kernel(program_file::String, kernel_name::String; vars...) end end -function versioninfo(io::IO=stdout) +function versioninfo(io::IO = stdout) println(io, "OpenCL.jl version $(pkgversion(@__MODULE__))") println(io) @@ -43,7 +43,7 @@ function versioninfo(io::IO=stdout) end println(io) - env = filter(var->startswith(var, "JULIA_OPENCL"), keys(ENV)) + env = filter(var -> startswith(var, "JULIA_OPENCL"), keys(ENV)) if !isempty(env) println(io, "Environment:") for var in env @@ -81,6 +81,7 @@ function versioninfo(io::IO=stdout) println(io) end end + return end export @enum_without_prefix @@ -106,7 +107,7 @@ macro enum_without_prefix(enum, prefix) for instance in instances(enum) name = String(Symbol(instance)) @assert startswith(name, prefix) - push!(ex.args, :(const $(Symbol(name[length(prefix)+1:end])) = $(mod).$(Symbol(name)))) + push!(ex.args, :(const $(Symbol(name[(length(prefix) + 1):end])) = $(mod).$(Symbol(name)))) end return esc(ex) diff --git a/test/array.jl b/test/array.jl index 82a8ab7f..6b556e69 100644 --- a/test/array.jl +++ b/test/array.jl @@ -2,22 +2,22 @@ using LinearAlgebra @testset "CLArray" begin @testset "constructors" begin - @test CLArray{Float32,1}(undef, 1) isa CLArray{Float32,1} - @test CLArray{Float32,1}(undef, 1; access=:r) isa CLArray{Float32,1} + @test CLArray{Float32, 1}(undef, 1) isa CLArray{Float32, 1} + @test CLArray{Float32, 1}(undef, 1; access = :r) isa CLArray{Float32, 1} - @test CLArray{Float32}(undef, 1, 2) isa CLArray{Float32,2} - @test CLArray{Float32}(undef, 1, 2; access=:r) isa CLArray{Float32,2} + @test CLArray{Float32}(undef, 1, 2) isa CLArray{Float32, 2} + @test CLArray{Float32}(undef, 1, 2; access = :r) isa CLArray{Float32, 2} - @test CLArray{Float32}(undef, (1, 2)) isa CLArray{Float32,2} - @test CLArray{Float32}(undef, (1, 2); access=:r) isa CLArray{Float32,2} + @test CLArray{Float32}(undef, (1, 2)) isa CLArray{Float32, 2} + @test CLArray{Float32}(undef, (1, 2); access = :r) isa CLArray{Float32, 2} - hostarray = rand(Float32, 128*64) + hostarray = rand(Float32, 128 * 64) A = CLArray(hostarray) - @test A isa CLArray{Float32,1} + @test A isa CLArray{Float32, 1} @test Array(A) == hostarray - B = CLArray(hostarray; access=:r) - @test B isa CLArray{Float32,1} + B = CLArray(hostarray; access = :r) + @test B isa CLArray{Float32, 1} @test Array(B) == hostarray @test Array(copy(A)) == Array(A) @@ -33,10 +33,10 @@ using LinearAlgebra A = CLArray(rand(Float32, 128, 64)) @test size(A) == (128, 64) @test ndims(A) == 2 - @test length(A) == 128*64 + @test length(A) == 128 * 64 # reshape - B = reshape(A, 128*64) + B = reshape(A, 128 * 64) @test reshape(B, 128, 64) == A end end diff --git a/test/behaviour.jl b/test/behaviour.jl index 99c7faac..4cc5dc7b 100644 --- a/test/behaviour.jl +++ b/test/behaviour.jl @@ -12,13 +12,13 @@ hello_world_str = "hello world" - str_len = length(hello_world_str) + 1 + str_len = length(hello_world_str) + 1 out_arr = CLArray{Cchar}(undef, str_len) - prg = cl.Program(source=hello_world_kernel) |> cl.build! - kern = cl.Kernel(prg, "hello") + prg = cl.Program(source = hello_world_kernel) |> cl.build! + kern = cl.Kernel(prg, "hello") - clcall(kern, Tuple{Ptr{Cchar}}, out_arr; global_size=str_len) + clcall(kern, Tuple{Ptr{Cchar}}, out_arr; global_size = str_len) h = Array(out_arr) @test hello_world_str == GC.@preserve h unsafe_string(pointer(h)) @@ -26,7 +26,7 @@ end @testset "Low Level API Test" begin - test_source = " + test_source = " __kernel void sum(__global const float *a, __global const float *b, __global float *c, @@ -85,41 +85,55 @@ end end # create input array in device memory - Aid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(Cfloat) * len, h_a, err_code) + Aid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_a, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer A") end - Bid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(Cfloat) * len, h_b, err_code) + Bid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_b, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer B") end - Eid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(Cfloat) * len, h_e, err_code) + Eid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_e, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer E") end - Gid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(Cfloat) * len, h_g, err_code) + Gid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_g, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer G") end # create output arrays in device memory - Cid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, - sizeof(Cfloat) * len, C_NULL, err_code) + Cid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_READ_WRITE, + sizeof(Cfloat) * len, C_NULL, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer C") end - Did = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, - sizeof(Cfloat) * len, C_NULL, err_code) + Did = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_READ_WRITE, + sizeof(Cfloat) * len, C_NULL, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer D") end - Fid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY, - sizeof(Cfloat) * len, C_NULL, err_code) + Fid = cl.clCreateBuffer( + ctx_id, cl.CL_MEM_WRITE_ONLY, + sizeof(Cfloat) * len, C_NULL, err_code + ) if err_code[] != cl.CL_SUCCESS error("Error creating buffer F") end @@ -130,24 +144,32 @@ end cl.clSetKernelArg(k_id, 3, sizeof(cl.Cuint), cl.Cuint[len]) nglobal = Ref{Csize_t}(len) - cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) + cl.clEnqueueNDRangeKernel( + q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL + ) cl.clSetKernelArg(k_id, 0, sizeof(cl.cl_mem), [Eid]) cl.clSetKernelArg(k_id, 1, sizeof(cl.cl_mem), [Cid]) cl.clSetKernelArg(k_id, 2, sizeof(cl.cl_mem), [Did]) - cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) + cl.clEnqueueNDRangeKernel( + q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL + ) cl.clSetKernelArg(k_id, 0, sizeof(cl.cl_mem), [Gid]) cl.clSetKernelArg(k_id, 1, sizeof(cl.cl_mem), [Did]) cl.clSetKernelArg(k_id, 2, sizeof(cl.cl_mem), [Fid]) - cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) + cl.clEnqueueNDRangeKernel( + q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL + ) # read back the result from compute device... - cl.clEnqueueReadBuffer(q_id, Fid, cl.CL_TRUE, 0, - sizeof(Cfloat) * len, h_f, 0, C_NULL, C_NULL) + cl.clEnqueueReadBuffer( + q_id, Fid, cl.CL_TRUE, 0, + sizeof(Cfloat) * len, h_f, 0, C_NULL, C_NULL + ) # test results for i in 1:len @@ -177,43 +199,49 @@ end } " - Params = @eval(module $(gensym("KernelTest")) - struct Params - A::Float32 - B::Float32 - #TODO: fixed size arrays? - X1::Float32 - X2::Float32 - C::Int32 - Params(a, b, x, c) = begin - new(Float32(a), - Float32(b), - Float32(x[1]), - Float32(x[2]), - Int32(c)) - end + Params = @eval( + module $(gensym("KernelTest")) + struct Params + A::Float32 + B::Float32 + #TODO: fixed size arrays? + X1::Float32 + X2::Float32 + C::Int32 + Params(a, b, x, c) = begin + new( + Float32(a), + Float32(b), + Float32(x[1]), + Float32(x[2]), + Int32(c) + ) end - end).Params + end + end + ).Params - p = cl.Program(source=test_struct) |> cl.build! + p = cl.Program(source = test_struct) |> cl.build! part3 = cl.Kernel(p, "part3") - X = fill(1f0, 10) - Y = fill(1f0, 10) + X = fill(1.0f0, 10) + Y = fill(1.0f0, 10) P = [Params(0.5, 10.0, [0.0, 0.0], 3)] #TODO: constructor for single immutable types.., check if passed parameter isbits - P_arr = CLArray(P; access=:r) + P_arr = CLArray(P; access = :r) - X_arr = CLArray(X; access=:r) - Y_arr = CLArray(Y; access=:r) - R_arr = CLArray{Float32}(undef, 10; access=:w) + X_arr = CLArray(X; access = :r) + Y_arr = CLArray(Y; access = :r) + R_arr = CLArray{Float32}(undef, 10; access = :w) global_size = size(X) - clcall(part3, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Params}}, - X_arr, Y_arr, R_arr, P_arr; global_size) + clcall( + part3, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Params}}, + X_arr, Y_arr, R_arr, P_arr; global_size + ) r = Array(R_arr) @test all(x -> x == 13.5, r) @@ -238,14 +266,16 @@ end } " - MutableParams = @eval(module $(gensym("KernelTest")) - mutable struct MutableParams - A::Float32 - B::Float32 - end - end).MutableParams + MutableParams = @eval( + module $(gensym("KernelTest")) + mutable struct MutableParams + A::Float32 + B::Float32 + end + end + ).MutableParams - p = cl.Program(source=test_mutable_pointerfree) |> cl.build! + p = cl.Program(source = test_mutable_pointerfree) |> cl.build! part3 = cl.Kernel(p, "part3") diff --git a/test/buffer.jl b/test/buffer.jl index 96864ee4..697c4238 100644 --- a/test/buffer.jl +++ b/test/buffer.jl @@ -10,61 +10,61 @@ # memory copy let buf = cl.Buffer{Int}(1) src = [42] - cl.enqueue_write(buf, pointer(src), sizeof(src); blocking=true) + cl.enqueue_write(buf, pointer(src), sizeof(src); blocking = true) dst = [0] - cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking=true) + cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking = true) @test dst == [42] end # host accessible, mapped - let buf = cl.Buffer{Int}(1; host_accessible=true) + let buf = cl.Buffer{Int}(1; host_accessible = true) src = [42] - cl.enqueue_write(buf, pointer(src), sizeof(src); blocking=true) + cl.enqueue_write(buf, pointer(src), sizeof(src); blocking = true) ptr, evt = cl.enqueue_map(buf, sizeof(buf), :rw) wait(evt) - mapped = unsafe_wrap(Array, convert(Ptr{Int}, ptr), 1; own=false) + mapped = unsafe_wrap(Array, convert(Ptr{Int}, ptr), 1; own = false) @test mapped[] == 42 cl.enqueue_unmap(buf, ptr) |> wait end # re-use host buffer, without copy - let arr = [1,2,3] - buf = cl.Buffer(arr; copy=false) + let arr = [1, 2, 3] + buf = cl.Buffer(arr; copy = false) dst = similar(arr) - cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking=true) + cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking = true) @test dst == arr # we still need to map, despite copy=false ptr, evt = cl.enqueue_map(buf, sizeof(buf), :rw) wait(evt) - mapped_arr = unsafe_wrap(Array, convert(Ptr{Int}, ptr), 3; own=false) + mapped_arr = unsafe_wrap(Array, convert(Ptr{Int}, ptr), 3; own = false) mapped_arr .= 42 cl.enqueue_unmap(buf, ptr) |> wait # but our pre-allocated buffer should have been updated too - @test arr == [42,42,42] + @test arr == [42, 42, 42] # and we can read it back - cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking=true) + cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking = true) @test dst == arr end # re-use host buffer, but copy - let arr = [1,2,3] - buf = cl.Buffer(arr; copy=true) + let arr = [1, 2, 3] + buf = cl.Buffer(arr; copy = true) dst = similar(arr) - cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking=true) + cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking = true) @test dst == arr arr .= 42 # but our pre-allocated buffer should not have been updated - cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking=true) - @test dst == [1,2,3] + cl.enqueue_read(pointer(dst), buf, sizeof(dst); blocking = true) + @test dst == [1, 2, 3] end # fill @@ -72,8 +72,8 @@ cl.enqueue_fill(buf, 42, 3) arr = Vector{Int}(undef, 3) - cl.enqueue_read(pointer(arr), buf, sizeof(arr); blocking=true) - @test arr == [42,42,42] + cl.enqueue_read(pointer(arr), buf, sizeof(arr); blocking = true) + @test arr == [42, 42, 42] end end @@ -95,7 +95,7 @@ end cl.enqueue_svm_memcpy(ptr, pointer(src), sizeof(src)) dst = [0] - cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking=true) + cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking = true) @test dst == [42] end @@ -108,13 +108,13 @@ end evt = cl.enqueue_svm_map(ptr, sizeof(src), :rw) wait(evt) - mapped = unsafe_wrap(Array, ptr, 1; own=false) + mapped = unsafe_wrap(Array, ptr, 1; own = false) @test mapped[] == 42 mapped[] = 100 cl.enqueue_svm_unmap(ptr) |> cl.wait dst = [0] - cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking=true) + cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking = true) @test dst == [100] end @@ -125,7 +125,7 @@ end cl.enqueue_svm_fill(ptr, 42, 3) dst = Vector{Int}(undef, 3) - cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking=true) - @test dst == [42,42,42] + cl.enqueue_svm_memcpy(pointer(dst), ptr, sizeof(dst); blocking = true) + @test dst == [42, 42, 42] end end diff --git a/test/cmdqueue.jl b/test/cmdqueue.jl index f31ed9b4..f4740bc4 100644 --- a/test/cmdqueue.jl +++ b/test/cmdqueue.jl @@ -6,9 +6,11 @@ cl.CmdQueue(:out_of_order) cl.CmdQueue((:profile, :out_of_order)) catch err - @warn("Platform $(cl.device().platform.name) does not seem to " * - "suport out of order queues: \n$err",maxlog=1, - exception=(err, catch_backtrace())) + @warn( + "Platform $(cl.device().platform.name) does not seem to " * + "suport out of order queues: \n$err", maxlog = 1, + exception = (err, catch_backtrace()) + ) end @test_throws ArgumentError cl.CmdQueue(:unrecognized_flag) for flag in [:profile, :out_of_order] diff --git a/test/context.jl b/test/context.jl index aa4a4c07..5d620c40 100644 --- a/test/context.jl +++ b/test/context.jl @@ -7,7 +7,7 @@ @test ctx.reference_count == 1 ctx_id = pointer(ctx) - ctx2 = cl.Context(ctx_id; retain=true) + ctx2 = cl.Context(ctx_id; retain = true) @test ctx.reference_count == 2 finalize(ctx2) @test ctx.reference_count == 1 @@ -48,19 +48,23 @@ catch err @test typeof(err) == cl.CLError # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers - @test err.desc in (:CL_INVALID_PLATFORM, - :CL_DEVICE_NOT_FOUND) + @test err.desc in ( + :CL_INVALID_PLATFORM, + :CL_DEVICE_NOT_FOUND, + ) end properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] - for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), - (cl.CL_DEVICE_TYPE_GPU, :gpu)] + for (cl_dev_type, sym_dev_type) in [ + (cl.CL_DEVICE_TYPE_CPU, :cpu), + (cl.CL_DEVICE_TYPE_GPU, :gpu), + ] if !cl.has_device_type(cl.platform(), sym_dev_type) continue end - @test cl.Context(sym_dev_type, properties=properties) != nothing - @test cl.Context(cl_dev_type, properties=properties) != nothing - ctx = cl.Context(cl_dev_type, properties=properties) + @test cl.Context(sym_dev_type, properties = properties) != nothing + @test cl.Context(cl_dev_type, properties = properties) != nothing + ctx = cl.Context(cl_dev_type, properties = properties) @test !isempty(ctx.properties) test_properties = ctx.properties @@ -78,15 +82,17 @@ @test platform_in_properties end try - ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR, - properties=properties) + ctx2 = cl.Context( + cl.CL_DEVICE_TYPE_ACCELERATOR, + properties = properties + ) catch err @test typeof(err) == cl.CLError @test err.desc == :CL_DEVICE_NOT_FOUND end end - @testset "parsing" begin + @testset "parsing" begin properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] parsed_properties = cl._parse_properties(properties) diff --git a/test/device.jl b/test/device.jl index 94441053..d3c41431 100644 --- a/test/device.jl +++ b/test/device.jl @@ -1,8 +1,12 @@ @testset "Device" begin @testset "Type" begin - for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, - cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), - (:gpu, :cpu, :accelerator, :all)) + for (t, k) in zip( + ( + cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, + cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL, + ), + (:gpu, :cpu, :accelerator, :all) + ) #for (dk, dt) in zip(cl.devices(cl.platform(), k), cl.devices(cl.platform(), t)) # @fact dk == dt --> true @@ -20,43 +24,43 @@ if length(devices) > 1 d1 = devices[1] for d2 in devices[2:end] - @test pointer(d2) != pointer(d1) - @test hash(d2) != hash(d1) - @test isequal(d2, d1) == false - end - end + @test pointer(d2) != pointer(d1) + @test hash(d2) != hash(d1) + @test isequal(d2, d1) == false + end + end end @testset "Info" begin device_info_keys = Symbol[ - :driver_version, - :version, - :extensions, - :platform, - :name, - :device_type, - :has_image_support, - :vendor_id, - :max_compute_units, - :max_work_item_size, - :max_clock_frequency, - :address_bits, - :max_read_image_args, - :max_write_image_args, - :global_mem_size, - :max_mem_alloc_size, - :max_const_buffer_size, - :local_mem_size, - :has_local_mem, - :host_unified_memory, - :available, - :compiler_available, - :max_work_group_size, - :max_parameter_size, - :profiling_timer_resolution, - :max_image2d_shape, - :max_image3d_shape, - ] + :driver_version, + :version, + :extensions, + :platform, + :name, + :device_type, + :has_image_support, + :vendor_id, + :max_compute_units, + :max_work_item_size, + :max_clock_frequency, + :address_bits, + :max_read_image_args, + :max_write_image_args, + :global_mem_size, + :max_mem_alloc_size, + :max_const_buffer_size, + :local_mem_size, + :has_local_mem, + :host_unified_memory, + :available, + :compiler_available, + :max_work_group_size, + :max_parameter_size, + :profiling_timer_resolution, + :max_image2d_shape, + :max_image3d_shape, + ] @test isa(cl.platform(), cl.Platform) @test_throws ErrorException cl.platform().zjdlkf diff --git a/test/event.jl b/test/event.jl index e5c00b2a..d2446dfb 100644 --- a/test/event.jl +++ b/test/event.jl @@ -3,68 +3,68 @@ if contains(cl.platform().vendor, "Intel") || contains(cl.platform().vendor, "po # hangs on Intel @warn "Skipping event tests on $(cl.platform().name)" else -@testset "Event" begin - @testset "status" begin - evt = cl.UserEvent() - evt.status - @test evt.status == :submitted - cl.complete(evt) - @test evt.status == :complete - finalize(evt) - end + @testset "Event" begin + @testset "status" begin + evt = cl.UserEvent() + evt.status + @test evt.status == :submitted + cl.complete(evt) + @test evt.status == :complete + finalize(evt) + end - @testset "wait" begin - # create user event - usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) + @testset "wait" begin + # create user event + usr_evt = cl.UserEvent() + cl.enqueue_wait_for_events(usr_evt) - # create marker event - mkr_evt = cl.enqueue_marker() + # create marker event + mkr_evt = cl.enqueue_marker() - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) - @test mkr_evt.status == :complete + wait(mkr_evt) + @test mkr_evt.status == :complete - @test cl.cl_event_status(:running) == cl.CL_RUNNING - @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED - @test cl.cl_event_status(:queued) == cl.CL_QUEUED - @test cl.cl_event_status(:complete) == cl.CL_COMPLETE - end + @test cl.cl_event_status(:running) == cl.CL_RUNNING + @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED + @test cl.cl_event_status(:queued) == cl.CL_QUEUED + @test cl.cl_event_status(:complete) == cl.CL_COMPLETE + end - @testset "callback" begin - global callback_called = Ref(false) + @testset "callback" begin + global callback_called = Ref(false) - function test_callback(evt, status) - callback_called[] = true - end + function test_callback(evt, status) + callback_called[] = true + end - usr_evt = cl.UserEvent() + usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) + cl.enqueue_wait_for_events(usr_evt) - mkr_evt = cl.enqueue_marker() - cl.add_callback(mkr_evt, test_callback) + mkr_evt = cl.enqueue_marker() + cl.add_callback(mkr_evt, test_callback) - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) - @test !callback_called[] + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) + @test !callback_called[] - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) + wait(mkr_evt) - # Give callback some time to finish - yield() - sleep(0.5) + # Give callback some time to finish + yield() + sleep(0.5) - @test mkr_evt.status == :complete - @test callback_called[] + @test mkr_evt.status == :complete + @test callback_called[] + end end end -end diff --git a/test/execution.jl b/test/execution.jl index 0db325a6..821c4b09 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -1,118 +1,124 @@ @testset "execution" begin -@testset "@opencl" begin + @testset "@opencl" begin -dummy() = nothing + dummy() = nothing -@test_throws UndefVarError @opencl undefined() -@test_throws MethodError @opencl dummy(1) + @test_throws UndefVarError @opencl undefined() + @test_throws MethodError @opencl dummy(1) -@testset "launch configuration" begin - @opencl dummy() + @testset "launch configuration" begin + @opencl dummy() - global_size = 1 - @opencl global_size dummy() - @opencl global_size=1 dummy() - @opencl global_size=(1,1) dummy() - @opencl global_size=(1,1,1) dummy() + global_size = 1 + @opencl global_size dummy() + @opencl global_size = 1 dummy() + @opencl global_size = (1, 1) dummy() + @opencl global_size = (1, 1, 1) dummy() - local_size = 1 - @opencl global_size local_size dummy() - @opencl global_size=1 local_size=1 dummy() - @opencl global_size=(1,1) local_size=(1,1) dummy() - @opencl global_size=(1,1,1) local_size=(1,1,1) dummy() + local_size = 1 + @opencl global_size local_size dummy() + @opencl global_size = 1 local_size = 1 dummy() + @opencl global_size = (1, 1) local_size = (1, 1) dummy() + @opencl global_size = (1, 1, 1) local_size = (1, 1, 1) dummy() - @test_throws ArgumentError @opencl global_size=(1,) local_size=(1,1) dummy() - @test_throws InexactError @opencl global_size=(-2) dummy() - @test_throws InexactError @opencl local_size=(-2) dummy() -end - -@testset "launch=false" begin - # XXX: how are svm_pointers handled here? - k = @opencl launch=false dummy() - k() - k(; global_size=1) -end - -@testset "inference" begin - foo() = @opencl dummy() - @inferred foo() - - # with arguments, we call clconvert - kernel(a) = return - bar(a) = @opencl kernel(a) - @inferred bar(CLArray([1])) -end + @test_throws ArgumentError @opencl global_size = (1,) local_size = (1, 1) dummy() + @test_throws InexactError @opencl global_size = (-2) dummy() + @test_throws InexactError @opencl local_size = (-2) dummy() + end + @testset "launch=false" begin + # XXX: how are svm_pointers handled here? + k = @opencl launch = false dummy() + k() + k(; global_size = 1) + end -@testset "reflection" begin - OpenCL.code_lowered(dummy, Tuple{}) - OpenCL.code_typed(dummy, Tuple{}) - OpenCL.code_warntype(devnull, dummy, Tuple{}) - OpenCL.code_llvm(devnull, dummy, Tuple{}) - OpenCL.code_native(devnull, dummy, Tuple{}) + @testset "inference" begin + foo() = @opencl dummy() + @inferred foo() - @device_code_lowered @opencl dummy() - @device_code_typed @opencl dummy() - @device_code_warntype io=devnull @opencl dummy() - @device_code_llvm io=devnull @opencl dummy() - @device_code_native io=devnull @opencl dummy() + # with arguments, we call clconvert + kernel(a) = return + bar(a) = @opencl kernel(a) + @inferred bar(CLArray([1])) + end - mktempdir() do dir - @device_code dir=dir @opencl dummy() - end - @test_throws ErrorException @device_code_lowered nothing + @testset "reflection" begin + OpenCL.code_lowered(dummy, Tuple{}) + OpenCL.code_typed(dummy, Tuple{}) + OpenCL.code_warntype(devnull, dummy, Tuple{}) + OpenCL.code_llvm(devnull, dummy, Tuple{}) + OpenCL.code_native(devnull, dummy, Tuple{}) - # make sure kernel name aliases are preserved in the generated code - @test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @opencl dummy()))) - @test occursin("dummy", sprint(io->(@device_code_llvm io=io @opencl dummy()))) - @test occursin("dummy", sprint(io->(@device_code_native io=io @opencl dummy()))) + @device_code_lowered @opencl dummy() + @device_code_typed @opencl dummy() + @device_code_warntype io = devnull @opencl dummy() + @device_code_llvm io = devnull @opencl dummy() + @device_code_native io = devnull @opencl dummy() - # make sure invalid kernels can be partially reflected upon - let - invalid_kernel() = throw() - @test_throws OpenCL.InvalidIRError @opencl invalid_kernel() - @test_throws OpenCL.InvalidIRError IOCapture.capture() do - @device_code_warntype @opencl invalid_kernel() - end - c = IOCapture.capture() do - try - @device_code_warntype @opencl invalid_kernel() - catch + mktempdir() do dir + @device_code dir = dir @opencl dummy() end - end - @test occursin("Body::Union{}", c.output) - end - # set name of kernel - @test occursin("mykernel", sprint(io->(@device_code_llvm io=io begin - @opencl name="mykernel" dummy() - end))) + @test_throws ErrorException @device_code_lowered nothing + + # make sure kernel name aliases are preserved in the generated code + @test occursin("dummy", sprint(io -> (@device_code_llvm io = io optimize = false @opencl dummy()))) + @test occursin("dummy", sprint(io -> (@device_code_llvm io = io @opencl dummy()))) + @test occursin("dummy", sprint(io -> (@device_code_native io = io @opencl dummy()))) + + # make sure invalid kernels can be partially reflected upon + let + invalid_kernel() = throw() + @test_throws OpenCL.InvalidIRError @opencl invalid_kernel() + @test_throws OpenCL.InvalidIRError IOCapture.capture() do + @device_code_warntype @opencl invalid_kernel() + end + c = IOCapture.capture() do + try + @device_code_warntype @opencl invalid_kernel() + catch + end + end + @test occursin("Body::Union{}", c.output) + end - @test OpenCL.return_type(identity, Tuple{Int}) === Int - @test OpenCL.return_type(sin, Tuple{Float32}) === Float32 - @test OpenCL.return_type(getindex, Tuple{CLDeviceArray{Float32,1,AS.Global},Int32}) === Float32 - @test OpenCL.return_type(getindex, Tuple{Base.RefValue{Integer}}) === Integer -end + # set name of kernel + @test occursin( + "mykernel", sprint( + io -> ( + @device_code_llvm io = io begin + @opencl name = "mykernel" dummy() + end + ) + ) + ) + + @test OpenCL.return_type(identity, Tuple{Int}) === Int + @test OpenCL.return_type(sin, Tuple{Float32}) === Float32 + @test OpenCL.return_type(getindex, Tuple{CLDeviceArray{Float32, 1, AS.Global}, Int32}) === Float32 + @test OpenCL.return_type(getindex, Tuple{Base.RefValue{Integer}}) === Integer + end -end + end -############################################################################### + ############################################################################### -@testset "argument passing" begin + @testset "argument passing" begin -function memset(a, val) - gid = get_global_id(1) - @inbounds a[gid] = val - return -end + function memset(a, val) + gid = get_global_id(1) + @inbounds a[gid] = val + return + end -a = CLArray{Int}(undef, 10) -@opencl global_size=length(a) memset(a, 42) -@test all(Array(a) .== 42) + a = CLArray{Int}(undef, 10) + @opencl global_size = length(a) memset(a, 42) + @test all(Array(a) .== 42) -end + end end diff --git a/test/kernel.jl b/test/kernel.jl index 442aa604..a1a2898e 100644 --- a/test/kernel.jl +++ b/test/kernel.jl @@ -15,14 +15,14 @@ #TODO: tests for invalid kernel build error && logs... @testset "constructor" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) @test_throws ArgumentError cl.Kernel(prg, "sum") cl.build!(prg) @test cl.Kernel(prg, "sum") != nothing end @testset "info" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) cl.build!(prg) k = cl.Kernel(prg, "sum") @test k.function_name == "sum" @@ -33,7 +33,7 @@ end @testset "mem/workgroup size" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) cl.build!(prg) k = cl.Kernel(prg, "sum") wginfo = cl.work_group_info(k, cl.device()) @@ -43,17 +43,17 @@ end @testset "set_arg!/set_args!" begin - prg = cl.Program(source=test_source) |> cl.build! + prg = cl.Program(source = test_source) |> cl.build! k = cl.Kernel(prg, "sum") - count = 1024 + count = 1024 nbytes = count * sizeof(Float32) h_ones = ones(Float32, count) - A = CLArray(h_ones; access=:r) - B = CLArray(h_ones; access=:r) - C = CLArray{Float32}(undef, count; access=:w) + A = CLArray(h_ones; access = :r) + B = CLArray(h_ones; access = :r) + C = CLArray{Float32}(undef, count; access = :w) # we use julia's index by one convention @test cl.set_arg!(k, 1, buffer(A)) != nothing @@ -71,7 +71,7 @@ k2 = cl.Kernel(prg, "sum") cl.set_args!(k2, buffer(A), buffer(B), buffer(C), UInt32(count)) - h_twos = fill(2f0, count) + h_twos = fill(2.0f0, count) copyto!(A, h_twos) copyto!(B, h_twos) @@ -90,17 +90,21 @@ *i += 1; };" - h_buff = Float32[1,] + h_buff = Float32[1] d_arr = CLArray(h_buff) - p = cl.Program(source=simple_kernel) |> cl.build! + p = cl.Program(source = simple_kernel) |> cl.build! k = cl.Kernel(p, "test") # dimensions must be the same size - @test_throws ArgumentError clcall(k, Tuple{Ptr{Float32}}, d_arr; - global_size=(1,), local_size=(1,1)) - @test_throws ArgumentError clcall(k, Tuple{Ptr{Float32}}, d_arr; - global_size=(1,1), local_size=(1,)) + @test_throws ArgumentError clcall( + k, Tuple{Ptr{Float32}}, d_arr; + global_size = (1,), local_size = (1, 1) + ) + @test_throws ArgumentError clcall( + k, Tuple{Ptr{Float32}}, d_arr; + global_size = (1, 1), local_size = (1,) + ) # dimensions are bounded max_work_dim = cl.device().max_work_item_dims @@ -108,12 +112,12 @@ # calls are asynchronous, but cl.read blocks clcall(k, Tuple{Ptr{Float32}}, d_arr) - @test Array(d_arr) == [2f0] + @test Array(d_arr) == [2.0f0] # enqueue task is an alias for calling # a kernel with a global/local size of 1 evt = cl.enqueue_task(k) - @test Array(d_arr) == [3f0] + @test Array(d_arr) == [3.0f0] end @testset "packed structures" begin @@ -133,7 +137,7 @@ out = CLArray{Float32}(undef, 2) bstruct = (1, Int32(4)) clcall(structkernel, Tuple{Ptr{Float32}, Tuple{Int64, Cint}}, out, bstruct) - @test Array(out) == [1f0, 4f0] + @test Array(out) == [1.0f0, 4.0f0] end @testset "vector arguments" begin @@ -153,10 +157,12 @@ out = CLArray{Float32}(undef, 6) # NOTE: the user is responsible for padding the vector to 4 elements # (only on some platforms) - vec3_a = (1f0, 2f0, 3f0, 0f0) - vec3_b = (4f0, 5f0, 6f0, 0f0) - clcall(vec3kernel, Tuple{Ptr{Float32}, NTuple{4,Float32}, NTuple{4,Float32}}, - out, vec3_a, vec3_b) - @test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0] + vec3_a = (1.0f0, 2.0f0, 3.0f0, 0.0f0) + vec3_b = (4.0f0, 5.0f0, 6.0f0, 0.0f0) + clcall( + vec3kernel, Tuple{Ptr{Float32}, NTuple{4, Float32}, NTuple{4, Float32}}, + out, vec3_a, vec3_b + ) + @test Array(out) == [1.0f0, 2.0f0, 3.0f0, 4.0f0, 5.0f0, 6.0f0] end end diff --git a/test/kernelabstractions.jl b/test/kernelabstractions.jl index debf5dec..44edd347 100644 --- a/test/kernelabstractions.jl +++ b/test/kernelabstractions.jl @@ -1,5 +1,7 @@ -skip_tests=Set([ - "sparse", - "Convert", # Need to opt out of i128 -]) +skip_tests = Set( + [ + "sparse", + "Convert", # Need to opt out of i128 + ] +) KATestSuite.testsuite(OpenCLBackend, "OpenCL", OpenCL, CLArray, CLDeviceArray; skip_tests) diff --git a/test/memory.jl b/test/memory.jl index 7ca07f81..3de25010 100644 --- a/test/memory.jl +++ b/test/memory.jl @@ -21,7 +21,7 @@ (:mem_flags, (:rw, :copy)), (:size, sizeof(buf)), (:reference_count, 1), - (:map_count, 0) + (:map_count, 0), ] for expectation in expectations diff --git a/test/platform.jl b/test/platform.jl index 5e1c95f4..ecbe819e 100644 --- a/test/platform.jl +++ b/test/platform.jl @@ -8,8 +8,8 @@ end @testset "Equality" begin - platform = cl.platforms()[1] - platform_copy = cl.platforms()[1] + platform = cl.platforms()[1] + platform_copy = cl.platforms()[1] @test pointer(platform) == pointer(platform_copy) @test hash(platform) == hash(platform_copy) diff --git a/test/program.jl b/test/program.jl index 01c62839..5b3587ec 100644 --- a/test/program.jl +++ b/test/program.jl @@ -1,7 +1,7 @@ @testset "Program" begin let @test_throws ArgumentError cl.Program() - @test_throws ArgumentError cl.Program(source="", il="") + @test_throws ArgumentError cl.Program(source = "", il = "") end test_source = " @@ -15,15 +15,15 @@ " function create_test_program() - cl.Program(source=test_source) + cl.Program(source = test_source) end @testset "source constructor" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) @test prg != nothing end @testset "info" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) @test prg.context == cl.context() @@ -39,7 +39,7 @@ end @testset "build" begin - prg = cl.Program(source=test_source) + prg = cl.Program(source = test_source) @test cl.build!(prg) != nothing @test prg.build_status[cl.device()] == cl.CL_BUILD_SUCCESS @@ -47,22 +47,22 @@ end @testset "source code" begin - prg = cl.Program(source=test_source) - @test prg.source == test_source + prg = cl.Program(source = test_source) + @test prg.source == test_source end if contains(cl.platform().vendor, "pocl") @warn "Skipping binary program tests on $(cl.platform().name)" else @testset "binaries" begin - prg = cl.Program(source=test_source) |> cl.build! + prg = cl.Program(source = test_source) |> cl.build! @test cl.device() in collect(keys(prg.binaries)) binaries = prg.binaries @test cl.device() in collect(keys(binaries)) @test binaries[cl.device()] != nothing @test length(binaries[cl.device()]) > 0 - prg2 = cl.Program(binaries=binaries) + prg2 = cl.Program(binaries = binaries) @test prg2.binaries == binaries @test prg2.source === nothing end diff --git a/test/runtests.jl b/test/runtests.jl index 9c1ce000..d7509792 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,14 +5,14 @@ using Printf: @sprintf using Base.Filesystem: path_separator # parse some command-line arguments -function extract_flag!(args, flag, default=nothing) +function extract_flag!(args, flag, default = nothing) for f in args if startswith(f, flag) # Check if it's just `--flag` or if it's `--flag=foo` if f != flag val = split(f, '=')[2] if default !== nothing && !(typeof(default) <: AbstractString) - val = parse(typeof(default), val) + val = parse(typeof(default), val) end else val = default @@ -27,7 +27,8 @@ function extract_flag!(args, flag, default=nothing) end do_help, _ = extract_flag!(ARGS, "--help") if do_help - println(""" + println( + """ Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...] --help Show this text. @@ -36,14 +37,15 @@ if do_help --jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS). --platform=NAME Run tests on the platform named `NAME` (default: all platforms). - Remaining arguments filter the tests that will be executed.""") + Remaining arguments filter the tests that will be executed.""" + ) exit(0) end _, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS) do_quickfail, _ = extract_flag!(ARGS, "--quickfail") include("setup.jl") # make sure everything is precompiled -@info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) +@info "System information:\n" * sprint(io -> OpenCL.versioninfo(io)) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." @@ -52,40 +54,40 @@ const tests = [] const test_runners = Dict() ## files in the test folder for (rootpath, dirs, files) in walkdir(@__DIR__) - # find Julia files - filter!(files) do file - endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl" - end - isempty(files) && continue - - # strip extension - files = map(files) do file - file[1:end-3] - end + # find Julia files + filter!(files) do file + endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl" + end + isempty(files) && continue - # prepend subdir - subdir = relpath(rootpath, @__DIR__) - if subdir != "." + # strip extension files = map(files) do file - joinpath(subdir, file) + file[1:(end - 3)] end - end - # unify path separators - files = map(files) do file - replace(file, path_separator => '/') - end + # prepend subdir + subdir = relpath(rootpath, @__DIR__) + if subdir != "." + files = map(files) do file + joinpath(subdir, file) + end + end - append!(tests, files) - for file in files - test_runners[file] = ()->include("$(@__DIR__)/$file.jl") - end + # unify path separators + files = map(files) do file + replace(file, path_separator => '/') + end + + append!(tests, files) + for file in files + test_runners[file] = () -> include("$(@__DIR__)/$file.jl") + end end -sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true) +sort!(tests; by = (file) -> stat("$(@__DIR__)/$file.jl").size, rev = true) ## GPUArrays testsuite for name in keys(GPUArraysTestSuite.tests) push!(tests, "gpuarrays/$name") - test_runners["gpuarrays/$name"] = ()->GPUArraysTestSuite.tests[name](CLArray) + test_runners["gpuarrays/$name"] = () -> GPUArraysTestSuite.tests[name](CLArray) end ## finalize unique!(tests) @@ -109,9 +111,9 @@ if !isempty(optlike_args) end ## the remaining args filter tests if !isempty(ARGS) - filter!(tests) do test - any(arg->startswith(test, arg), ARGS) - end + filter!(tests) do test + any(arg -> startswith(test, arg), ARGS) + end end # add workers @@ -125,8 +127,8 @@ push!(test_exeflags.exec, "--depwarn=yes") push!(test_exeflags.exec, "--project=$(Base.active_project())") const test_exename = popfirst!(test_exeflags.exec) function addworker(X; kwargs...) - withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do - procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...) + return withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do + procs = addprocs(X; exename = test_exename, exeflags = test_exeflags, kwargs...) @everywhere procs include($(joinpath(@__DIR__, "setup.jl"))) procs end @@ -136,19 +138,25 @@ addworker(min(jobs, length(tests))) # pretty print information about gc and mem usage testgroupheader = "Test" workerheader = "(Worker)" -name_align = maximum([textwidth(testgroupheader) + textwidth(" ") + - textwidth(workerheader); map(x -> textwidth(x) + - 3 + ndigits(nworkers()), tests)]) -elapsed_align = textwidth("Time (s)") -gc_align = textwidth("GC (s)") +name_align = maximum( + [ + textwidth(testgroupheader) + textwidth(" ") + + textwidth(workerheader); map( + x -> textwidth(x) + + 3 + ndigits(nworkers()), tests + ) + ] +) +elapsed_align = textwidth("Time (s)") +gc_align = textwidth("GC (s)") percent_align = textwidth("GC %") -alloc_align = textwidth("Alloc (MB)") -rss_align = textwidth("RSS (MB)") +alloc_align = textwidth("Alloc (MB)") +rss_align = textwidth("RSS (MB)") printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ") -printstyled(" | ---------------- CPU ---------------- |\n", color=:white) -printstyled(testgroupheader, color=:white) -printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white) -printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white) +printstyled(" | ---------------- CPU ---------------- |\n", color = :white) +printstyled(testgroupheader, color = :white) +printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color = :white) +printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color = :white) print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock() if stderr isa Base.LibuvStream stderr.lock = print_lock @@ -156,35 +164,37 @@ end function print_testworker_stats(test, wrkr, resp) @nospecialize resp lock(print_lock) - try - printstyled(test, color=:white) - printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white) - time_str = @sprintf("%7.2f",resp[2]) - printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white) + return try + printstyled(test, color = :white) + printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color = :white) + time_str = @sprintf("%7.2f", resp[2]) + printstyled(lpad(time_str, elapsed_align, " "), " | ", color = :white) cpu_gc_str = @sprintf("%5.2f", resp[4]) - printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white) + printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color = :white) # since there may be quite a few digits in the percentage, # the left-padding here is less to make sure everything fits cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2]) - printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white) + printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color = :white) cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20) - printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white) + printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color = :white) cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20) - printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white) + printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color = :white) finally unlock(print_lock) end end -global print_testworker_started = (name, wrkr)->begin +global print_testworker_started = (name, wrkr) -> begin end function print_testworker_errored(name, wrkr) lock(print_lock) - try - printstyled(name, color=:red) - printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", - " "^elapsed_align, " failed at $(now())\n", color=:red) + return try + printstyled(name, color = :red) + printstyled( + lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", + " "^elapsed_align, " failed at $(now())\n", color = :red + ) finally unlock(print_lock) end @@ -212,9 +222,9 @@ try break elseif c == '?' println("Currently running: ") - tests = sort(collect(running_tests), by=x->x[2]) + tests = sort(collect(running_tests), by = x -> x[2]) foreach(tests) do (test, date) - println(test, " (running for ", round(now()-date, Minute), ")") + println(test, " (running for ", round(now() - date, Minute), ")") end end end @@ -227,7 +237,7 @@ try end @sync begin function recycle_worker(p) - rmprocs(p, waitfor=30) + rmprocs(p, waitfor = 30) return nothing end @@ -248,9 +258,11 @@ try # run the test running_tests[test] = now() try - resp = remotecall_fetch(runtests, wrkr, - test_runners[test], test, - platform) + resp = remotecall_fetch( + runtests, wrkr, + test_runners[test], test, + platform + ) catch e isa(e, InterruptException) && return resp = Any[e] @@ -281,15 +293,17 @@ catch e isa(e, InterruptException) || rethrow() # If the test suite was merely interrupted, still print the # summary, which can be useful to diagnose what's going on - foreach(task -> begin + foreach( + task -> begin istaskstarted(task) || return istaskdone(task) && return try - schedule(task, InterruptException(); error=true) + schedule(task, InterruptException(); error = true) catch ex - @error "InterruptException" exception=ex,catch_backtrace() + @error "InterruptException" exception = ex, catch_backtrace() end - end, all_tasks) + end, all_tasks + ) for t in all_tasks # NOTE: we can't just wait, but need to discard the exception, # because the throwto for --quickfail also kills the worker. @@ -301,11 +315,11 @@ catch e end finally if @isdefined stdin_monitor - schedule(stdin_monitor, InterruptException(); error=true) + schedule(stdin_monitor, InterruptException(); error = true) end end t1 = now() -elapsed = canonicalize(Dates.CompoundPeriod(t1-t0)) +elapsed = canonicalize(Dates.CompoundPeriod(t1 - t0)) println("Testing finished in $elapsed") # construct a testset to render the test results @@ -318,7 +332,7 @@ for (testname, (resp,)) in results Test.push_testset(resp) Test.record(o_ts, resp) Test.pop_testset() - elseif isa(resp, Tuple{Int,Int}) + elseif isa(resp, Tuple{Int, Int}) fake = Test.DefaultTestSet(testname) for i in 1:resp[1] Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) @@ -364,8 +378,12 @@ end for test in all_tests (test in completed_tests) && continue fake = Test.DefaultTestSet(test) - Test.record(fake, Test.Error(:test_interrupted, test, nothing, - [("skipped", [])], LineNumberNode(1))) + Test.record( + fake, Test.Error( + :test_interrupted, test, nothing, + [("skipped", [])], LineNumberNode(1) + ) + ) Test.push_testset(fake) Test.record(o_ts, fake) Test.pop_testset() diff --git a/test/setup.jl b/test/setup.jl index 536fa2f0..0d580c52 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -6,11 +6,11 @@ using IOCapture # Include it directly. const KATestSuite = let mod = @eval module $(gensym()) - using ..Test - import KernelAbstractions - kernelabstractions = pathof(KernelAbstractions) - kernelabstractions_root = dirname(dirname(kernelabstractions)) - include(joinpath(kernelabstractions_root, "test", "testsuite.jl")) + using ..Test + import KernelAbstractions + kernelabstractions = pathof(KernelAbstractions) + kernelabstractions_root = dirname(dirname(kernelabstractions)) + include(joinpath(kernelabstractions_root, "test", "testsuite.jl")) end mod.Testsuite end @@ -19,11 +19,11 @@ end # Include it directly. const GPUArraysTestSuite = let mod = @eval module $(gensym()) - using ..Test - import GPUArrays - gpuarrays = pathof(GPUArrays) - gpuarrays_root = dirname(dirname(gpuarrays)) - include(joinpath(gpuarrays_root, "test", "testsuite.jl")) + using ..Test + import GPUArrays + gpuarrays = pathof(GPUArrays) + gpuarrays_root = dirname(dirname(gpuarrays)) + include(joinpath(gpuarrays_root, "test", "testsuite.jl")) end mod.TestSuite end @@ -31,10 +31,12 @@ testf(f, xs...; kwargs...) = GPUArraysTestSuite.compare(f, CLArray, xs...; kwarg const device_eltypes = Dict() function GPUArraysTestSuite.supported_eltypes(::Type{<:CLArray}) - get!(device_eltypes, cl.device()) do - types = [Int16, Int32, Int64, - Complex{Int16}, Complex{Int32}, Complex{Int64}, - Float32, ComplexF32] + return get!(device_eltypes, cl.device()) do + types = [ + Int16, Int32, Int64, + Complex{Int16}, Complex{Int32}, Complex{Int64}, + Float32, ComplexF32, + ] if "cl_khr_fp64" in cl.device().extensions push!(types, Float64) push!(types, ComplexF64) @@ -60,7 +62,7 @@ function runtests(f, name, platform_filter) if isempty(targets) for platform in cl.platforms(), - device in cl.devices(platform) + device in cl.devices(platform) if platform_filter !== nothing # filter on the name or vendor names = lowercase.([platform.name, platform.vendor]) @@ -79,7 +81,7 @@ function runtests(f, name, platform_filter) end end - try + return try # generate a temporary module to execute the tests in mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_')) mod = @eval(Main, module $mod_name end) @@ -91,7 +93,7 @@ function runtests(f, name, platform_filter) # some tests require native execution capabilities requires_il = name in ["execution", "kernelabstractions"] || - startswith(name, "gpuarrays/") + startswith(name, "gpuarrays/") ex = quote GC.gc(true) @@ -116,19 +118,21 @@ function runtests(f, name, platform_filter) cpu_rss = Sys.maxrss() if VERSION >= v"1.11.0-DEV.1529" tc = Test.get_test_counts(data[1]) - passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = + passes, fails, error, broken, c_passes, c_fails, c_errors, c_broken = tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes, tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken else - passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken = + passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(data[1]) end if data[1].anynonpass == false - data = ((passes+c_passes,broken+c_broken), - data[2], - data[3], - data[4], - data[5]) + data = ( + (passes + c_passes, broken + c_broken), + data[2], + data[3], + data[4], + data[5], + ) end res = vcat(collect(data), cpu_rss)