diff --git a/.github/workflows/CI_CUDA.yml b/.github/workflows/CI_CUDA.yml new file mode 100644 index 0000000..a71e485 --- /dev/null +++ b/.github/workflows/CI_CUDA.yml @@ -0,0 +1,46 @@ +name: CI_CUDA +on: + push: + branches: + - main + tags: ['*'] + pull_request: +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + name: Julia ${{ matrix.version }} - self-hosted - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: [self-hosted, linux, cuda] + strategy: + fail-fast: false + matrix: + version: + - '1.12' + - '1.11' + - '1.10' + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + # - uses: julia-actions/julia-runtest@v1 + - name: test-cuda + run: | + julia --project=@. -e 'using Pkg; Pkg.test("FiniteElementContainers"; coverage=true, julia_args=["--check-bounds=yes", "--compiled-modules=yes", "--depwarn=yes"], force_latest_compatible_version=false, allow_reresolve=true)' + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + files: lcov.info + #plugins: noop + #disable_search: true + fail_ci_if_error: true + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/README.md b/README.md index b32affd..cd4863e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://cthonios.github.io/FiniteElementContainers.jl/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://cthonios.github.io/FiniteElementContainers.jl/dev/) [![Build Status](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![CUDA](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI_CUDA.yml/badge.svg?branch=main)](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI_CUDA.yml?query=branch%3Amain) [![ROCm](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI_ROCM.yml/badge.svg?branch=main)](https://github.com/Cthonios/FiniteElementContainers.jl/actions/workflows/CI_ROCM.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/Cthonios/FiniteElementContainers.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Cthonios/FiniteElementContainers.jl) diff --git a/src/Parameters.jl b/src/Parameters.jl index 6a3b2e5..76fc7ba 100644 --- a/src/Parameters.jl +++ b/src/Parameters.jl @@ -231,7 +231,7 @@ function update_bc_values!(p::Parameters) return nothing end -function update_dofs!(asm::SparseMatrixAssembler, p::Parameters) +function update_dofs!(asm::AbstractAssembler, p::Parameters) update_dofs!(asm, p.dirichlet_bcs) return nothing end diff --git a/src/assemblers/Assemblers.jl b/src/assemblers/Assemblers.jl index 15744f3..9528edf 100644 --- a/src/assemblers/Assemblers.jl +++ b/src/assemblers/Assemblers.jl @@ -8,67 +8,6 @@ $(TYPEDSIGNATURES) """ KA.get_backend(asm::AbstractAssembler) = KA.get_backend(asm.dof) -function _adjust_matrix_action_entries_for_constraints!( - Av, constraint_storage, v, ::KA.CPU - # TODO do we need a penalty scale here as well? -) - @assert length(Av) == length(constraint_storage) - @assert length(v) == length(constraint_storage) - # modify Av => (I - G) * Av + Gv - # TODO is this the right thing to do? I think so... - for i in 1:length(constraint_storage) - @inbounds Av[i] = (1. - constraint_storage[i]) * Av[i] + constraint_storage[i] * v[i] - end - return nothing -end - -# COV_EXCL_START -KA.@kernel function _adjust_matrix_action_entries_for_constraints_kernel!( - Av, constraint_storage, v -) - I = KA.@index(Global) - # modify Av => (I - G) * Av + Gv - @inbounds Av[I] = (1. - constraint_storage[I]) * Av[I] + constraint_storage[I] * v[I] -end -# COV_EXCL_STOP - -function _adjust_matrix_action_entries_for_constraints!( - Av, constraint_storage, v, backend::KA.Backend -) - @assert length(Av) == length(constraint_storage) - @assert length(v) == length(constraint_storage) - kernel! = _adjust_matrix_action_entries_for_constraints_kernel!(backend) - kernel!(Av, constraint_storage, v, ndrange = length(Av)) - return nothing -end - -function _adjust_vector_entries_for_constraints!(b, constraint_storage, ::KA.CPU) - @assert length(b) == length(constraint_storage) - # modify b => (I - G) * b + (Gu - g) - # but Gu = g, so we don't need that here - # unless we want to modify this to support weakly - # enforced BCs later - for i in 1:length(constraint_storage) - @inbounds b[i] = (1. - constraint_storage[i]) * b[i] - end - return nothing -end - -# COV_EXCL_START -KA.@kernel function _adjust_vector_entries_for_constraints_kernel(b, constraint_storage) - I = KA.@index(Global) - # modify b => (I - G) * b + (Gu - g) - @inbounds b[I] = (1. - constraint_storage[I]) * b[I] -end -# COV_EXCL_STOP - -function _adjust_vector_entries_for_constraints!(b, constraint_storage, backend::KA.Backend) - @assert length(b) == length(constraint_storage) - kernel! = _adjust_vector_entries_for_constraints_kernel(backend) - kernel!(b, constraint_storage, ndrange = length(b)) - return nothing -end - """ $(TYPEDSIGNATURES) Assembly method for an H1Field, e.g. internal force @@ -243,11 +182,13 @@ function stiffness(assembler::AbstractAssembler) end # some utilities +include("Constraints.jl") include("SparsityPatterns.jl") # types include("MatrixFreeAssembler.jl") include("SparseMatrixAssembler.jl") +# include("SparseMatrixAssemblerNew.jl") # methods include("Matrix.jl") diff --git a/src/assemblers/Constraints.jl b/src/assemblers/Constraints.jl new file mode 100644 index 0000000..43d88ae --- /dev/null +++ b/src/assemblers/Constraints.jl @@ -0,0 +1,137 @@ +# TODO this only work on CPU right now +function _adjust_matrix_entries_for_constraints!( + A::SparseMatrixCSC, constraint_storage, ::KA.CPU; + penalty_scale = 1.e6 +) + # first ensure things are the right size + @assert size(A, 1) == size(A, 2) + @assert length(constraint_storage) == size(A, 2) + + # hacky for now + # need a penalty otherwise we get into trouble with + # iterative linear solvers even for a simple poisson problem + # TODO perhaps this should be optional somehow + penalty = penalty_scale * tr(A) / size(A, 2) + + # now modify A => (I - G) * A + G + nz = nonzeros(A) + rowval = rowvals(A) + for j in 1:size(A, 2) + col_start = A.colptr[j] + col_end = A.colptr[j + 1] - 1 + for k in col_start:col_end + # for (I - G) * A term + nz[k] = (1. - constraint_storage[j]) * nz[k] + + # for + G term + if rowval[k] == j + @inbounds nz[k] = nz[k] + penalty * constraint_storage[j] + end + end + end + + return nothing +end + +# COV_EXCL_START +KA.@kernel function _adjust_matrix_entries_for_constraints_kernel!( + A, constraint_storage, trA; + penalty_scale = 1.e6 +) + J = KA.@index(Global) + + penalty = penalty_scale * trA / size(A, 2) + + # now modify A => (I - G) * A + G + nz = nonzeros(A) + rowval = rowvals(A) + + col_start = A.colptr[J] + col_end = A.colptr[J + 1] - 1 + for k in col_start:col_end + # for (I - G) * A term + nz[k] = (1. - constraint_storage[J]) * nz[k] + + # for + G term + if rowval[k] == J + @inbounds nz[k] = nz[k] + penalty * constraint_storage[J] + end + end +end +# COV_EXCL_STOP + +function _adjust_matrix_entries_for_constraints( + A, constraint_storage, backend::KA.Backend +) + # first ensure things are the right size + @assert size(A, 1) == size(A, 2) + @assert length(constraint_storage) == size(A, 2) + + # get trA ahead of time to save some allocations at kernel level + trA = tr(A) + + kernel! = _adjust_matrix_entries_for_constraints_kernel!(backend) + kernel!(A, constraint_storage, trA, ndrange = size(A, 2)) + return nothing +end + +function _adjust_matrix_action_entries_for_constraints!( + Av, constraint_storage, v, ::KA.CPU + # TODO do we need a penalty scale here as well? + ) + @assert length(Av) == length(constraint_storage) + @assert length(v) == length(constraint_storage) + # modify Av => (I - G) * Av + Gv + # TODO is this the right thing to do? I think so... + for i in 1:length(constraint_storage) + @inbounds Av[i] = (1. - constraint_storage[i]) * Av[i] + constraint_storage[i] * v[i] + end + return nothing +end + +# COV_EXCL_START +KA.@kernel function _adjust_matrix_action_entries_for_constraints_kernel!( + Av, constraint_storage, v +) + I = KA.@index(Global) + # modify Av => (I - G) * Av + Gv + @inbounds Av[I] = (1. - constraint_storage[I]) * Av[I] + constraint_storage[I] * v[I] +end +# COV_EXCL_STOP + +function _adjust_matrix_action_entries_for_constraints!( + Av, constraint_storage, v, backend::KA.Backend +) + @assert length(Av) == length(constraint_storage) + @assert length(v) == length(constraint_storage) + kernel! = _adjust_matrix_action_entries_for_constraints_kernel!(backend) + kernel!(Av, constraint_storage, v, ndrange = length(Av)) + return nothing +end + +function _adjust_vector_entries_for_constraints!(b, constraint_storage, ::KA.CPU) + @assert length(b) == length(constraint_storage) + # modify b => (I - G) * b + (Gu - g) + # but Gu = g, so we don't need that here + # unless we want to modify this to support weakly + # enforced BCs later + for i in 1:length(constraint_storage) + @inbounds b[i] = (1. - constraint_storage[i]) * b[i] + end + return nothing +end + +# COV_EXCL_START +KA.@kernel function _adjust_vector_entries_for_constraints_kernel(b, constraint_storage) + I = KA.@index(Global) + # modify b => (I - G) * b + (Gu - g) + @inbounds b[I] = (1. - constraint_storage[I]) * b[I] +end +# COV_EXCL_STOP + +function _adjust_vector_entries_for_constraints!(b, constraint_storage, backend::KA.Backend) + @assert length(b) == length(constraint_storage) + kernel! = _adjust_vector_entries_for_constraints_kernel(backend) + kernel!(b, constraint_storage, ndrange = length(b)) + return nothing +end diff --git a/src/assemblers/SparseMatrixAssembler.jl b/src/assemblers/SparseMatrixAssembler.jl index 5c20bdf..650401d 100644 --- a/src/assemblers/SparseMatrixAssembler.jl +++ b/src/assemblers/SparseMatrixAssembler.jl @@ -108,83 +108,6 @@ function Base.show(io::IO, asm::SparseMatrixAssembler) println(io, " ", asm.dof) end -# TODO this only work on CPU right now -function _adjust_matrix_entries_for_constraints!( - A::SparseMatrixCSC, constraint_storage, ::KA.CPU; - penalty_scale = 1.e6 -) - # first ensure things are the right size - @assert size(A, 1) == size(A, 2) - @assert length(constraint_storage) == size(A, 2) - - # hacky for now - # need a penalty otherwise we get into trouble with - # iterative linear solvers even for a simple poisson problem - # TODO perhaps this should be optional somehow - penalty = penalty_scale * tr(A) / size(A, 2) - - # now modify A => (I - G) * A + G - nz = nonzeros(A) - rowval = rowvals(A) - for j in 1:size(A, 2) - col_start = A.colptr[j] - col_end = A.colptr[j + 1] - 1 - for k in col_start:col_end - # for (I - G) * A term - nz[k] = (1. - constraint_storage[j]) * nz[k] - - # for + G term - if rowval[k] == j - @inbounds nz[k] = nz[k] + penalty * constraint_storage[j] - end - end - end - - return nothing -end - -# COV_EXCL_START -KA.@kernel function _adjust_matrix_entries_for_constraints_kernel!( - A, constraint_storage, trA; - penalty_scale = 1.e6 -) - J = KA.@index(Global) - - penalty = penalty_scale * trA / size(A, 2) - - # now modify A => (I - G) * A + G - nz = nonzeros(A) - rowval = rowvals(A) - - col_start = A.colptr[J] - col_end = A.colptr[J + 1] - 1 - for k in col_start:col_end - # for (I - G) * A term - nz[k] = (1. - constraint_storage[J]) * nz[k] - - # for + G term - if rowval[k] == J - @inbounds nz[k] = nz[k] + penalty * constraint_storage[J] - end - end -end -# COV_EXCL_STOP - -function _adjust_matrix_entries_for_constraints( - A, constraint_storage, backend::KA.Backend -) - # first ensure things are the right size - @assert size(A, 1) == size(A, 2) - @assert length(constraint_storage) == size(A, 2) - - # get trA ahead of time to save some allocations at kernel level - trA = tr(A) - - kernel! = _adjust_matrix_entries_for_constraints_kernel!(backend) - kernel!(A, constraint_storage, trA, ndrange = size(A, 2)) - return nothing -end - function _hessian(assembler::SparseMatrixAssembler, ::KA.CPU) H = SparseArrays.sparse!(assembler.matrix_pattern, assembler.hessian_storage) @@ -223,7 +146,7 @@ end # the residual and stiffness appropriately without having to reshape, Is, Js, etc. # when we want to change BCs which is slow -function update_dofs!(assembler::SparseMatrixAssembler, dirichlet_bcs) +function update_dofs!(assembler::AbstractAssembler, dirichlet_bcs::DirichletBCs) use_condensed = _is_condensed(assembler.dof) if length(dirichlet_bcs) > 0 @@ -249,7 +172,7 @@ end # the residual and stiffness appropriately without having to reshape, Is, Js, etc. # when we want to change BCs which is slow -function _update_dofs_condensed!(assembler::SparseMatrixAssembler) +function _update_dofs_condensed!(assembler::AbstractAssembler) assembler.constraint_storage[assembler.dof.unknown_dofs] .= 0. assembler.constraint_storage[assembler.dof.dirichlet_dofs] .= 1. return nothing diff --git a/src/assemblers/SparseMatrixAssemblerNew.jl b/src/assemblers/SparseMatrixAssemblerNew.jl index 1b7f660..9ebbc5c 100644 --- a/src/assemblers/SparseMatrixAssemblerNew.jl +++ b/src/assemblers/SparseMatrixAssemblerNew.jl @@ -1,4 +1,4 @@ -struct SparseMatrixAssembler{ +struct SparseMatrixAssemblerNew{ Condensed, IV <: AbstractArray{Int, 1}, RV <: AbstractArray{Float64, 1}, @@ -6,6 +6,121 @@ struct SparseMatrixAssembler{ } <: AbstractAssembler{DofManager{Condensed, Int, IV, Var}} constraint_storage::RV dof::DofManager{Condensed, Int, IV, Var} - matrix_pattern::SparseMatrixPattern{IV, BlockPattern, RV} + matrix_pattern::SparseMatrixPattern{IV, RV} vector_pattern::SparseVectorPattern{IV} -end \ No newline at end of file +end + +function SparseMatrixAssemblerNew(dof::DofManager) + matrix_pattern = SparseMatrixPattern(dof) + vector_pattern = SparseVectorPattern(dof) + + ND, NN = size(dof) + n_total_dofs = ND * NN + constraint_storage = zeros(n_total_dofs) + constraint_storage[dof.dirichlet_dofs] .= 1. + + return SparseMatrixAssemblerNew( + constraint_storage, dof, + matrix_pattern, vector_pattern + ) +end + +function SparseMatrixAssemblerNew(var::AbstractFunction; use_condensed::Bool = false) + dof = DofManager(var; use_condensed=use_condensed) + return SparseMatrixAssemblerNew(dof) +end + +function Adapt.adapt_structure(to, asm::SparseMatrixAssemblerNew) + return SparseMatrixAssemblerNew( + adapt(to, asm.constraint_storage), + adapt(to, asm.dof), + adapt(to, asm.matrix_pattern), + adapt(to, asm.vector_pattern) + ) +end + +function Base.show(io::IO, asm::SparseMatrixAssemblerNew) + println(io, "SparseMatrixAssembler") + println(io, " ", asm.dof) +end + +function create_block_quadrature_cache(asm::SparseMatrixAssemblerNew, ::Type = Float64) + T = eltype(asm.constraint_storage) + backend = KA.get_backend(asm) + fspace = function_space(asm.dof) + cache = Matrix{T}[] + for (key, val) in pairs(fspace.ref_fes) + NQ = ReferenceFiniteElements.num_quadrature_points(val) + NE = size(getfield(fspace.elem_conns, key), 2) + field = zeros(T, NQ, NE) + push!(cache, field) + end + cache = NamedTuple{keys(fspace.ref_fes)}(tuple(cache...)) + return adapt(backend, cache) +end + +function create_sparse_matrix_cache(asm::SparseMatrixAssemblerNew) + T = eltype(asm.constraint_storage) + backend = KA.get_backend(asm) + return KA.zeros(T, backend, num_entries(asm.matrix_pattern)) +end + +function create_sparse_vector_cache(asm::SparseMatrixAssemblerNew) + T = eltype(asm.constraint_storage) + backend = KA.get_backend(asm) + return KA.zeros(T, backend, num_entries(asm.vector_pattern)) +end + +function assemble_scalar_new!( + dof, + func, Uu, p +) + fspace = function_space(dof) + t = current_time(p.times) + Δt = time_step(p.times) + _update_for_assembly!(p, dof, Uu) + for ( + conns, + block_physics, ref_fe, + state_old, state_new, props + ) in zip( + values(fspace.elem_conns), + values(p.physics), values(fspace.ref_fes), + values(p.state_old), values(p.state_new), + values(p.properties) + ) + _assemble_block_scalar!( + conns, + func, + block_physics, ref_fe, + p.h1_coords, t, Δt, + p.h1_field, p.h1_field_old, + state_old, state_new, props + ) + end +end + +function _assemble_block_scalar!( + conns, func, + physics, ref_fe, X, t, Δt, U, U_old, state_old, state_new, props +) + mapreduce(+, axes(conns, 2)) do e + x_el = _element_level_fields_flat(X, ref_fe, conns, e) + u_el = _element_level_fields_flat(U, ref_fe, conns, e) + u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, e) + props_el = _element_level_properties(props, e) + w = zero(eltype(X)) + for q in 1:num_quadrature_points(ref_fe) + # mapreduce(+, 1:num_quadrature_points(ref_fe)) do q + interps = _cell_interpolants(ref_fe, q) + state_old_q = _quadrature_level_state(state_old, q, e) + state_new_q = _quadrature_level_state(state_new, q, e) + w += func(physics, interps, x_el, t, Δt, u_el, u_el_old, state_old_q, state_new_q, props_el) + end + w + end +end + +function _update_dofs!(assembler::SparseMatrixAssemblerNew, dirichlet_dofs::T) where T <: AbstractArray{<:Integer, 1} + _update_dofs!(assembler.matrix_pattern, assembler.dof, dirichlet_dofs) +end diff --git a/src/assemblers/SparsityPatterns.jl b/src/assemblers/SparsityPatterns.jl index 8fdcf4a..cf71a23 100644 --- a/src/assemblers/SparsityPatterns.jl +++ b/src/assemblers/SparsityPatterns.jl @@ -264,3 +264,4 @@ function Adapt.adapt_structure(to, pattern::SparseVectorPattern) ) end +num_entries(pattern::SparseVectorPattern) = length(pattern.Is)