Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI_CUDA.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v2
# - uses: julia-actions/cache@v2
- uses: julia-actions/julia-buildpkg@v1
# - uses: julia-actions/julia-runtest@v1
- name: test-cuda
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/CI_ROCM.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v2
# - uses: julia-actions/cache@v2
- uses: julia-actions/julia-buildpkg@v1
# - uses: julia-actions/julia-runtest@v1
- name: test-rocm
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "FiniteElementContainers"
uuid = "d08262e4-672f-4e7f-a976-f2cea5767631"
version = "0.10.0"
version = "0.10.1"
authors = ["Craig M. Hamel <[email protected]> and contributors"]

[deps]
Expand Down
12 changes: 9 additions & 3 deletions ext/FiniteElementContainersAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ function AMDGPU.rocSPARSE.ROCSparseMatrixCSC(asm::SparseMatrixAssembler)
end

function FiniteElementContainers._stiffness(asm::SparseMatrixAssembler, ::ROCBackend)
stiffness = AMDGPU.rocSPARSE.ROCSparseMatrixCSC(asm)
return stiffness
K = AMDGPU.rocSPARSE.ROCSparseMatrixCSC(asm)

if FiniteElementContainers._is_condensed(asm.dof)
FiniteElementContainers._adjust_matrix_entries_for_constraints!(
K, asm.constraint_storage, get_backend(asm)
)
end
return K
end

end # module
end # module
8 changes: 7 additions & 1 deletion ext/FiniteElementContainersCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ function CUDA.CUSPARSE.CuSparseMatrixCSC(asm::SparseMatrixAssembler)
end

function FiniteElementContainers._stiffness(asm::SparseMatrixAssembler, ::CUDABackend)
return CUDA.CUSPARSE.CuSparseMatrixCSC(asm)
K = CUDA.CUSPARSE.CuSparseMatrixCSC(asm)
if FiniteElementContainers._is_condensed(asm.dof)
FiniteElementContainers._adjust_matrix_entries_for_constraints!(
K, asm.constraint_storage, get_backend(asm)
)
end
return K
end

# this one isn't quite right
Expand Down
49 changes: 35 additions & 14 deletions src/assemblers/Constraints.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
# util for sparse matrices on gpus
# TODO make non-allocating through some mapreduce functionality
# COV_EXCL_START
KA.@kernel function __sptrace_kernel!(diagA, colptr, rowvals, nz)
J = KA.@index(Global)
col_start = colptr[J]
col_end = colptr[J + 1] - 1
for k in col_start:col_end
if rowvals[k] == J
diagA[J] = nz[k]
end
end
end
# COV_EXCL_STOP

function __sptrace(A)
backend = KA.get_backend(A)
diagA = KA.zeros(backend, eltype(A), size(A, 1))
kernel! = __sptrace_kernel!(backend)
kernel!(diagA, A.colPtr, A.rowVal, A.nzVal, ndrange = size(A, 2))
return sum(diagA)
end

# TODO this only work on CPU right now
function _adjust_matrix_entries_for_constraints!(
A::SparseMatrixCSC, constraint_storage, ::KA.CPU;
Expand Down Expand Up @@ -35,43 +58,41 @@ end

# COV_EXCL_START
KA.@kernel function _adjust_matrix_entries_for_constraints_kernel!(
A, constraint_storage, trA;
penalty_scale = 1.e6
colptr, rowvals, nz, size_A_2,
constraint_storage, trA, penalty_scale
)
J = KA.@index(Global)

penalty = penalty_scale * trA / size(A, 2)
penalty = penalty_scale * trA / size_A_2

# now modify A => (I - G) * A + G
nz = nonzeros(A)
rowval = rowvals(A)

col_start = A.colptr[J]
col_end = A.colptr[J + 1] - 1
col_start = colptr[J]
col_end = colptr[J + 1] - 1
for k in col_start:col_end
# for (I - G) * A term
nz[k] = (1. - constraint_storage[J]) * nz[k]

# for + G term
if rowval[k] == J
if rowvals[k] == J
@inbounds nz[k] = nz[k] + penalty * constraint_storage[J]
end
end
end
# COV_EXCL_STOP

function _adjust_matrix_entries_for_constraints(
A, constraint_storage, backend::KA.Backend
function _adjust_matrix_entries_for_constraints!(
A, constraint_storage, backend::KA.Backend;
penalty_scale = 1.e6
)
# first ensure things are the right size
@assert size(A, 1) == size(A, 2)
@assert length(constraint_storage) == size(A, 2)

# get trA ahead of time to save some allocations at kernel level
trA = tr(A)
trA = __sptrace(A)

kernel! = _adjust_matrix_entries_for_constraints_kernel!(backend)
kernel!(A, constraint_storage, trA, ndrange = size(A, 2))
kernel!(A.colPtr, A.rowVal, A.nzVal, size(A, 2), constraint_storage, trA, penalty_scale, ndrange = size(A, 2))
return nothing
end

Expand Down
17 changes: 17 additions & 0 deletions test/TestAssemblers.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
function test_sparse_matrix_gpu_trace()
A = SparseMatrixCSC(rand(10, 10))
trA_check = tr(A)

if AMDGPU.functional()
A_gpu = adapt(ROCArray, A)
elseif CUDA.functional()
A_gpu = adapt(CuArray, A)
else
return nothing
end

trA = FiniteElementContainers.__sptrace(A_gpu)
@test trA ≈ trA_check
end

function test_sparse_matrix_assembler()
# create very simple poisson problem
mesh_file = "./poisson/poisson.g"
Expand Down Expand Up @@ -51,5 +67,6 @@ function test_sparse_matrix_assembler()
end

@testset "Sparse matrix assembler" begin
test_sparse_matrix_gpu_trace()
test_sparse_matrix_assembler()
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ using ForwardDiff
using LinearAlgebra
using MPI
using ReferenceFiniteElements
using SparseArrays
using StaticArrays
using Tensors
using Test
Expand Down
Loading