diff --git a/src/assemblers/Assemblers.jl b/src/assemblers/Assemblers.jl index 9528edf..a6c8977 100644 --- a/src/assemblers/Assemblers.jl +++ b/src/assemblers/Assemblers.jl @@ -8,24 +8,98 @@ $(TYPEDSIGNATURES) """ KA.get_backend(asm::AbstractAssembler) = KA.get_backend(asm.dof) +abstract type AssembledReturnType end + +struct AssembledMatrix <: AssembledReturnType +end + +struct AssembledScalar <: AssembledReturnType +end + +struct AssembledStruct{T} <: AssembledReturnType +end + +struct AssembledVector <: AssembledReturnType +end + +# fall back method does nothing +@inline function _accumulate_q_value(::AssembledReturnType, storage, val_q, val_e, q, e) + val_e = val_e + val_q + return val_e +end + +@inline function _accumulate_q_value(::AssembledScalar, storage, val_q, val_e, q, e) + storage[q, e] = val_q + return nothing +end + +@inline function _accumulate_q_value(::AssembledStruct, storage, val_q, val_e, q, e) + storage[q, e] = val_q + return nothing +end + +# assembly helper methods below +function _assemble_element!( + storage, R_el::SVector, + conns, # all connectivities for this block + el_id::Int, ::Int, ::Int +) + n_dofs = size(storage, 1) + for d in axes(storage, 1) + for n in axes(conns, 1) + global_id = n_dofs * (conns[n, el_id] - 1) + d + local_id = n_dofs * (n - 1) + d + Atomix.@atomic storage.data[global_id] += R_el[local_id] + end + end + return nothing +end + +# has a pretty dumb name for what it does +function _assemble_element!( + storage, val_q::T, + conns, # all connectivities for this block + ::Int, ::Int, ::Int +) where T <: Number + return nothing +end + +# TODO we'll need a regular matrix implementation +# as well (Can we live with 1?) """ $(TYPEDSIGNATURES) -Assembly method for an H1Field, e.g. internal force -Called on a single element e for a given block b where local_val -has already been constructed from quadrature contributions. -""" -function _assemble_element!(global_val::H1Field, local_val, conn) - n_dofs = size(global_val, 1) - for i in axes(conn, 1) - for d in 1:n_dofs - global_id = n_dofs * (conn[i] - 1) + d - local_id = n_dofs * (i - 1) + d - global_val[global_id] += local_val[local_id] - end +Specialization of of ```_assemble_element!``` for ```SparseMatrixAssembler```. +""" +function _assemble_element!( + storage, K_el::SMatrix, + conns, # all connectivities for this block + el_id::Int, + block_start_index::Int, block_el_level_size::Int +) + # figure out ids needed to update + start_id = block_start_index + + (el_id - 1) * block_el_level_size + end_id = start_id + block_el_level_size - 1 + ids = start_id:end_id + + # get appropriate storage and update values + # @views storage[ids] += K_el[:] + for (i, id) in enumerate(ids) + storage[id] += K_el.data[i] end return nothing end +# has a pretty dumb name for what it does +# fall back fro struct +function _assemble_element!( + storage, val_q, + conns, # all connectivities for this block + ::Int, ::Int, ::Int +) + return nothing +end + """ $(TYPEDSIGNATURES) """ @@ -102,6 +176,43 @@ $(TYPEDSIGNATURES) return R_el end +""" +$(TYPEDSIGNATURES) +""" +@inline function _element_scratch(::AssembledMatrix, ref_fe, U) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + K_el = zeros(SMatrix{NxNDof, NxNDof, eltype(U), NxNDof * NxNDof}) + return K_el +end + +""" +$(TYPEDSIGNATURES) +""" +@inline function _element_scratch(::AssembledScalar, ref_fe, U) + s_el = zero(eltype(U)) + return s_el +end + +""" +$(TYPEDSIGNATURES) +""" +@inline function _element_scratch(::AssembledStruct, ref_fe, U) + return nothing +end + +""" +$(TYPEDSIGNATURES) +""" +@inline function _element_scratch(::AssembledVector, ref_fe, U) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + R_el = zeros(SVector{NxNDof, eltype(U)}) + return R_el +end + """ $(TYPEDSIGNATURES) """ @@ -181,6 +292,143 @@ function stiffness(assembler::AbstractAssembler) return _stiffness(assembler, KA.get_backend(assembler)) end +# General assembler methods + +# CPU implementation +""" +$(TYPEDSIGNATURES) +Assembly method for a block labelled as block_id. This is a CPU implementation +with no threading. + +TODO add state variables and physics properties +""" +function _assemble_block!( + ::KA.CPU, + # field::AbstractArray{<:Number, 1}, + field, + conns::Connectivity, block_start_index::Int, block_el_level_size::Int, + func::Function, + physics::AbstractPhysics, ref_fe::ReferenceFE, + X::AbstractField, t::T, dt::T, + U::Solution, U_old::Solution, + state_old::S, state_new::S, props::AbstractArray, + return_type::R +) where { + T <: Number, + Solution <: AbstractField, + S, #<: L2QuadratureField + R <: AssembledReturnType +} + + for e in axes(conns, 2) + x_el = _element_level_fields_flat(X, ref_fe, conns, e) + u_el = _element_level_fields_flat(U, ref_fe, conns, e) + u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, e) + props_el = _element_level_properties(props, e) + val_el = _element_scratch(return_type, ref_fe, U) + + for q in 1:num_quadrature_points(ref_fe) + interps = _cell_interpolants(ref_fe, q) + state_old_q = _quadrature_level_state(state_old, q, e) + state_new_q = _quadrature_level_state(state_new, q, e) + val_q = func(physics, interps, x_el, t, dt, u_el, u_el_old, state_old_q, state_new_q, props_el) + val_el = _accumulate_q_value(return_type, field, val_q, val_el, q, e) + end + _assemble_element!(field, val_el, conns, e, block_start_index, block_el_level_size) + end + return nothing +end + +# GPU implementation +# COV_EXCL_START +KA.@kernel function _assemble_block_kernel!( + # field::AbstractArray{<:Number, 1}, + field, + conns::Connectivity, block_start_index::Int, block_el_level_size::Int, + func::Function, + physics::AbstractPhysics, ref_fe::ReferenceFE, + X::AbstractField, t::T, dt::T, + U::Solution, U_old::Solution, + state_old::S, state_new::S, props::AbstractArray, + return_type::R +) where { + T <: Number, + Solution <: AbstractField, + S, #<: L2QuadratureField + R <: AssembledReturnType +} + E = KA.@index(Global) + + x_el = _element_level_fields_flat(X, ref_fe, conns, E) + u_el = _element_level_fields_flat(U, ref_fe, conns, E) + u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, E) + props_el = _element_level_properties(props, E) + val_el = _element_scratch(return_type, ref_fe, U) + for q in 1:num_quadrature_points(ref_fe) + interps = _cell_interpolants(ref_fe, q) + state_old_q = _quadrature_level_state(state_old, q, E) + state_new_q = _quadrature_level_state(state_new, q, E) + val_q = func(physics, interps, x_el, t, dt, u_el, u_el_old, state_old_q, state_new_q, props_el) + val_el = _accumulate_q_value(return_type, field, val_q, val_el, q, E) + end + + _assemble_element!(field, val_el, conns, E, block_start_index, block_el_level_size) +end +# COV_EXCL_STOP + +# method for kernel generation +function _assemble_block!( + backend::KA.Backend, + field, + conns, block_start_index, block_el_level_size, + func, + physics, ref_fe, + X, t, dt, U, U_old, state_old, state_new, props, + return_type +) + kernel! = _assemble_block_kernel!(backend) + kernel!( + field, + conns, block_start_index, block_el_level_size, + func, + physics, ref_fe, + X, t, dt, + U, U_old, + state_old, state_new, props, + return_type, + ndrange=size(conns, 2) + ) + return nothing +end + +function create_assembler_cache( + asm::AbstractAssembler, + ::AssembledMatrix +) + return zeros(length(asm.matrix_pattern.Is)) +end + +function create_assembler_cache( + asm::AbstractAssembler, + ::AssembledScalar +) + vals = L2ElementField[] + fspace = function_space(asm.dof) + for (conn, ref_fe) in zip(values(fspace.elem_conns), values(fspace.ref_fes)) + NQ = ReferenceFiniteElements.num_quadrature_points(ref_fe) + NE = size(conn, 2) + push!(vals, L2ElementField(zeros(Float64, NQ, NE))) + end + return NamedTuple{keys(fspace.elem_conns)}(tuple(vals...)) +end + +function create_assembler_cache( + asm::AbstractAssembler, + ::AssembledVector +) + return create_field(asm) +end + # some utilities include("Constraints.jl") include("SparsityPatterns.jl") diff --git a/src/assemblers/Matrix.jl b/src/assemblers/Matrix.jl index 3dfd5e8..03aaf15 100644 --- a/src/assemblers/Matrix.jl +++ b/src/assemblers/Matrix.jl @@ -25,10 +25,12 @@ function assemble_matrix!( storage, pattern, dof, func::F, Uu, p ) where F <: Function fill!(storage, zero(eltype(storage))) + backend = KA.get_backend(storage) fspace = function_space(dof) t = current_time(p.times) dt = time_step(p.times) _update_for_assembly!(p, dof, Uu) + return_type = AssembledMatrix() for ( conns, block_start_index, block_el_level_size, block_physics, ref_fe, @@ -41,9 +43,7 @@ function assemble_matrix!( values(p.state_old), values(p.state_new), values(p.properties), ) - # TODO re-enable back-end checking - backend = KA.get_backend(p.h1_field) - _assemble_block_matrix!( + _assemble_block!( backend, storage, conns, block_start_index, block_el_level_size, @@ -51,130 +51,8 @@ function assemble_matrix!( block_physics, ref_fe, p.h1_coords, t, dt, p.h1_field, p.h1_field_old, - state_old, state_new, props + state_old, state_new, props, + return_type ) end end - -# CPU implementation - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a CPU implementation -with no threading. - -TODO add state variables and physics properties -""" -function _assemble_block_matrix!( - ::KA.CPU, - field::AbstractArray{<:Number, 1}, - conns::Connectivity, block_start_index::Int, block_el_level_size::Int, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, dt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - - for e in axes(conns, 2) - x_el = _element_level_fields_flat(X, ref_fe, conns, e) - u_el = _element_level_fields_flat(U, ref_fe, conns, e) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, e) - props_el = _element_level_properties(props, e) - K_el = _element_scratch_matrix(ref_fe, U) - - for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, e) - state_new_q = _quadrature_level_state(state_new, q, e) - K_q = func(physics, interps, x_el, t, dt, u_el, u_el_old, state_old_q, state_new_q, props_el) - K_el = K_el + K_q - end - _assemble_element!(field, K_el, e, block_start_index, block_el_level_size) - end - return nothing -end - - -# GPU implementation -# COV_EXCL_START -KA.@kernel function _assemble_block_matrix_kernel!( - field::AbstractArray{<:Number, 1}, - conns::Connectivity, block_start_index::Int, block_el_level_size::Int, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, dt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - E = KA.@index(Global) - - x_el = _element_level_fields_flat(X, ref_fe, conns, E) - u_el = _element_level_fields_flat(U, ref_fe, conns, E) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, E) - props_el = _element_level_properties(props, E) - K_el = _element_scratch_matrix(ref_fe, U) - for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, E) - state_new_q = _quadrature_level_state(state_new, q, E) - K_q = func(physics, interps, x_el, t, dt, u_el, u_el_old, state_old_q, state_new_q, props_el) - K_el = K_el + K_q - end - - # leaving here just in case - # block_start_index = values(pattern.block_start_indices)[block_id] - # block_el_level_size = values(pattern.block_el_level_sizes)[block_id] - # start_id = block_start_index + - # (E - 1) * block_el_level_size - # end_id = start_id + block_el_level_size - 1 - # ids = start_id:end_id - # for (i, id) in enumerate(ids) - # Atomix.@atomic field[id] += K_el.data[i] - # end - _assemble_element!(field, K_el, E, block_start_index, block_el_level_size) -end -# COV_EXCL_STOP - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a GPU agnostic implementation -using KernelAbstractions and Atomix for eliminating race conditions - -TODO add state variables and physics properties -""" -function _assemble_block_matrix!( - backend::KA.Backend, - field::AbstractArray{<:Number, 1}, - conns::Connectivity, block_start_index::Int, block_el_level_size::Int, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, dt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - kernel! = _assemble_block_matrix_kernel!(backend) - kernel!( - field, - conns, block_start_index, block_el_level_size, - func, - physics, ref_fe, - X, t, dt, - U, U_old, - state_old, state_new, props, - ndrange=size(conns, 2) - ) - return nothing -end diff --git a/src/assemblers/MatrixAction.jl b/src/assemblers/MatrixAction.jl index a75ce6c..a0882ff 100644 --- a/src/assemblers/MatrixAction.jl +++ b/src/assemblers/MatrixAction.jl @@ -19,10 +19,12 @@ function assemble_matrix_action!( storage, dof, func, Uu, Vu, p ) fill!(storage, zero(eltype(storage))) + backend = KA.get_backend(storage) fspace = function_space(dof) t = current_time(p.times) Δt = time_step(p.times) _update_for_assembly!(p, dof, Uu, Vu) + return_type = AssembledVector() for ( conns, block_physics, ref_fe, @@ -33,16 +35,16 @@ function assemble_matrix_action!( values(p.state_old), values(p.state_new), values(p.properties) ) - backend = KA.get_backend(p.h1_field) _assemble_block_matrix_action!( backend, storage, - conns, + conns, 0, 0, func, block_physics, ref_fe, p.h1_coords, t, Δt, p.h1_field, p.h1_field_old, p.h1_hvp_scratch_field, - state_old, state_new, props + state_old, state_new, props, + return_type ) end end @@ -60,12 +62,13 @@ TODO improve typing of fields to ensure they mathc up in terms of function function _assemble_block_matrix_action!( ::KA.CPU, field::AbstractField, - conns::Connectivity, + conns::Connectivity, b1, b2, func::Function, physics::AbstractPhysics, ref_fe::ReferenceFE, X::AbstractField, t::T, Δt::T, U::Solution, U_old::Solution, V::Solution, - state_old::S, state_new::S, props::AbstractArray + state_old::S, state_new::S, props::AbstractArray, + return_type ) where { T <: Number, Solution <: AbstractField, @@ -86,7 +89,9 @@ function _assemble_block_matrix_action!( K_el = K_el + K_q end Kv_el = K_el * v_el - @views _assemble_element!(field, Kv_el, conns[:, e]) + # @views _assemble_element!(field, Kv_el, conns[:, e]) + # zeros aren't need for this case of this method + @views _assemble_element!(field, Kv_el, conns, e, 0, 0) end end @@ -101,12 +106,13 @@ TODO mark const fields # COV_EXCL_START KA.@kernel function _assemble_block_matrix_action_kernel!( field::AbstractField, - conns::Connectivity, + conns::Connectivity, b1, b2, func::Function, physics::AbstractPhysics, ref_fe::ReferenceFE, X::AbstractField, t::T, Δt::T, U::Solution, U_old::Solution, V::Solution, - state_old::S, state_new::S, props::AbstractArray + state_old::S, state_new::S, props::AbstractArray, + return_type ) where { T <: Number, Solution <: AbstractField, @@ -130,14 +136,17 @@ KA.@kernel function _assemble_block_matrix_action_kernel!( Kv_el = K_el * v_el # now assemble atomically - n_dofs = size(field, 1) - for i in 1:size(conns, 1) - for d in 1:n_dofs - global_id = n_dofs * (conns[i, E] - 1) + d - local_id = n_dofs * (i - 1) + d - Atomix.@atomic field.vals[global_id] += Kv_el[local_id] - end - end + # n_dofs = size(field, 1) + # for i in 1:size(conns, 1) + # for d in 1:n_dofs + # global_id = n_dofs * (conns[i, E] - 1) + d + # local_id = n_dofs * (i - 1) + d + # Atomix.@atomic field.vals[global_id] += Kv_el[local_id] + # end + # end + # zeros are there since that method needs ints for + # matrix case + _assemble_element!(field, Kv_el, conns, E, 0, 0) end # COV_EXCL_STOP @@ -151,12 +160,13 @@ TODO add state variables and physics properties function _assemble_block_matrix_action!( backend::KA.Backend, field::AbstractField, - conns::Connectivity, + conns::Connectivity, b1, b2, func::Function, physics::AbstractPhysics, ref_fe::ReferenceFE, X::AbstractField, t::T, Δt::T, U::Solution, U_old::Solution, V::Solution, - state_old::S, state_new::S, props::AbstractArray + state_old::S, state_new::S, props::AbstractArray, + return_type ) where { T <: Number, Solution <: AbstractField, @@ -165,12 +175,13 @@ function _assemble_block_matrix_action!( kernel! = _assemble_block_matrix_action_kernel!(backend) kernel!( field, - conns, + conns, b1, b2, func, physics, ref_fe, X, t, Δt, U, U_old, V, state_old, state_new, props, t, Δt, + return_type, ndrange=size(conns, 2) ) return nothing diff --git a/src/assemblers/NeumannBC.jl b/src/assemblers/NeumannBC.jl index b813c09..8dd9b10 100644 --- a/src/assemblers/NeumannBC.jl +++ b/src/assemblers/NeumannBC.jl @@ -22,9 +22,9 @@ function assemble_vector_neumann_bc!( # do not zero! # TODO should below 2 methods calls be assumed to have # been conducted previously? + backend = KA.get_backend(p.h1_field) _update_for_assembly!(p, dof, Uu) for bc in values(p.neumann_bcs.bc_caches) - backend = KA.get_backend(bc) _assemble_block_vector_neumann_bc!( backend, storage, p.h1_field, p.h1_coords, @@ -62,7 +62,7 @@ function _assemble_block_vector_neumann_bc!( R_el = R_el + JxW * Nvec * f_val end - @views _assemble_element!(field, R_el, bc.surface_conns[:, e]) + @views _assemble_element!(field, R_el, bc.surface_conns, e, 0, 0) end end @@ -100,15 +100,7 @@ KA.@kernel function _assemble_block_vector_neumann_bc_kernel!( R_el = R_el + JxW * Nvec * f_val end - # now assemble atomically - n_dofs = size(field, 1) - for i in 1:size(bc.surface_conns, 1) - for d in 1:n_dofs - global_id = n_dofs * (bc.surface_conns[i, E] - 1) + d - local_id = n_dofs * (i - 1) + d - Atomix.@atomic field.data[global_id] += R_el[local_id] - end - end + _assemble_element!(field, R_el, bc.surface_conns, E, 0, 0) end # COV_EXCL_STOP diff --git a/src/assemblers/QuadratureQuantity.jl b/src/assemblers/QuadratureQuantity.jl index 8b1a3cd..fedc4cf 100644 --- a/src/assemblers/QuadratureQuantity.jl +++ b/src/assemblers/QuadratureQuantity.jl @@ -4,8 +4,11 @@ $(TYPEDSIGNATURES) function assemble_scalar!( assembler, func::F, Uu, p ) where F <: Function + # the nothing below is because + # there is no needed sparsity pattern assemble_quadrature_quantity!( - assembler.scalar_quadrature_storage, assembler.dof, + assembler.scalar_quadrature_storage, + nothing, assembler.dof, func, Uu, p ) end @@ -14,13 +17,15 @@ end $(TYPEDSIGNATURES) """ function assemble_quadrature_quantity!( - storage, dof, - func::F, Uu, p + storage, pattern, dof, + func::F, Uu, p, return_type = AssembledScalar() ) where F <: Function + backend = KA.get_backend(p.h1_field) fspace = function_space(dof) t = current_time(p.times) Δt = time_step(p.times) _update_for_assembly!(p, dof, Uu) + # return_type = AssembledScalar() for ( block_storage, conns, @@ -33,129 +38,16 @@ function assemble_quadrature_quantity!( values(p.state_old), values(p.state_new), values(p.properties) ) - backend = KA.get_backend(p.h1_field) - _assemble_block_quadrature_quantity!( + _assemble_block!( backend, block_storage, - conns, + conns, 0, 0, func, block_physics, ref_fe, p.h1_coords, t, Δt, p.h1_field, p.h1_field_old, - state_old, state_new, props + state_old, state_new, props, + return_type ) end end - -# CPU Implementation - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a CPU implementation -with no threading. - -TODO improve typing of fields to ensure they mathc up in terms of function - spaces -""" -function _assemble_block_quadrature_quantity!( - ::KA.CPU, - field::AbstractArray, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - for e in axes(conns, 2) - x_el = _element_level_fields_flat(X, ref_fe, conns, e) - u_el = _element_level_fields_flat(U, ref_fe, conns, e) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, e) - props_el = _element_level_properties(props, e) - - for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, e) - state_new_q = _quadrature_level_state(state_new, q, e) - e_q = func(physics, interps, x_el, t, Δt, u_el, u_el_old, state_old_q, state_new_q, props_el) - field[q, e] = e_q - end - end -end - -""" -$(TYPEDSIGNATURES) -Kernel for residual block assembly - -TODO mark const fields -""" -# COV_EXCL_START -KA.@kernel function _assemble_block_quadrature_quantity_kernel!( - field::AbstractArray, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - # Q, E = KA.@index(Global, NTuple) - E = KA.@index(Global) - x_el = _element_level_fields_flat(X, ref_fe, conns, E) - u_el = _element_level_fields_flat(U, ref_fe, conns, E) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, E) - props_el = _element_level_properties(props, E) - - KA.Extras.@unroll for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, E) - state_new_q = _quadrature_level_state(state_new, q, E) - e_q = func(physics, interps, x_el, t, Δt, u_el, u_el_old, state_old_q, state_new_q, props_el) - @inbounds field[q, E] = e_q - end -end -# COV_EXCL_STOP - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a GPU agnostic implementation -using KernelAbstractions and Atomix for eliminating race conditions - -TODO add state variables and physics properties -""" -function _assemble_block_quadrature_quantity!( - backend::KA.Backend, - field::AbstractArray, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - kernel! = _assemble_block_quadrature_quantity_kernel!(backend) - kernel!( - field, - conns, - func, - physics, ref_fe, - X, t, Δt, - U, U_old, - state_old, state_new, props, - ndrange=size(field, 2) - ) - # KA.synchronize(backend) - return nothing -end diff --git a/src/assemblers/SparsityPatterns.jl b/src/assemblers/SparsityPatterns.jl index aceaa93..1eb9e6c 100644 --- a/src/assemblers/SparsityPatterns.jl +++ b/src/assemblers/SparsityPatterns.jl @@ -7,14 +7,11 @@ case where you want to eliminate fixed-dofs or not. """ struct SparseMatrixPattern{ I <: AbstractArray{Int, 1}, - # B, R <: AbstractArray{Float64, 1} } Is::I Js::I unknown_dofs::I - # block_start_indices::B - # block_el_level_sizes::B block_start_indices::Vector{Int} block_el_level_sizes::Vector{Int} # cache arrays @@ -61,11 +58,6 @@ function SparseMatrixPattern(dof::DofManager) block_el_level_sizes[n] = size(conn, 1)^2 end - # convert to NamedTuples so it's easy to index - # block_syms = keys(fspace.ref_fes) - # block_start_indices = NamedTuple{block_syms}(tuple(block_start_indices)...) - # block_el_level_sizes = NamedTuple{block_syms}(tuple(block_el_level_sizes)...) - # setup pre-allocated arrays based on number of entries found above Is = Vector{Int64}(undef, n_entries) Js = Vector{Int64}(undef, n_entries) @@ -142,29 +134,6 @@ function SparseArrays.sparse!(pattern::SparseMatrixPattern, storage) ) end -""" -$(TYPEDSIGNATURES) -Specialization of of ```_assemble_element!``` for ```SparseMatrixAssembler```. -""" -function _assemble_element!( - storage, K_el::SMatrix, - el_id::Int, - block_start_index::Int, block_el_level_size::Int -) - # figure out ids needed to update - start_id = block_start_index + - (el_id - 1) * block_el_level_size - end_id = start_id + block_el_level_size - 1 - ids = start_id:end_id - - # get appropriate storage and update values - # @views storage[ids] += K_el[:] - for (i, id) in enumerate(ids) - storage[id] += K_el.data[i] - end - return nothing -end - num_entries(s::SparseMatrixPattern) = length(s.Is) # NOTE this methods assumes that dof is up to date diff --git a/src/assemblers/Vector.jl b/src/assemblers/Vector.jl index d0914ac..2baf9fd 100644 --- a/src/assemblers/Vector.jl +++ b/src/assemblers/Vector.jl @@ -5,7 +5,8 @@ function assemble_vector!( assembler, func::F, Uu, p ) where F <: Function assemble_vector!( - assembler.residual_storage, assembler.dof, + assembler.residual_storage, + assembler.vector_pattern, assembler.dof, func, Uu, p ) return nothing @@ -15,15 +16,17 @@ end $(TYPEDSIGNATURES) """ function assemble_vector!( - storage, dof, func::F, Uu, p + storage, pattern, dof, func::F, Uu, p ) where F <: Function fill!(storage, zero(eltype(storage))) + backend = KA.get_backend(storage) fspace = function_space(dof) t = current_time(p.times) Δt = time_step(p.times) _update_for_assembly!(p, dof, Uu) + return_type = AssembledVector() for ( - conns, + conns, block_physics, ref_fe, state_old, state_new, props ) in zip( @@ -32,146 +35,19 @@ function assemble_vector!( values(p.state_old), values(p.state_new), values(p.properties) ) - backend = KA.get_backend(p.h1_field) - _assemble_block_vector!( + _assemble_block!( backend, storage, - conns, + conns, 0, 0, # NOTE these are never used for vectors + # TODO eventually we'll need them if we want to use sparse vectors func, block_physics, ref_fe, p.h1_coords, t, Δt, p.h1_field, p.h1_field_old, - state_old, state_new, props + state_old, state_new, props, + return_type ) end return nothing end - -# CPU Implementation - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a CPU implementation -with no threading. - -TODO improve typing of fields to ensure they mathc up in terms of function - spaces -""" -function _assemble_block_vector!( - ::KA.CPU, - field::AbstractField, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - for e in axes(conns, 2) - x_el = _element_level_fields_flat(X, ref_fe, conns, e) - u_el = _element_level_fields_flat(U, ref_fe, conns, e) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, e) - props_el = _element_level_properties(props, e) - R_el = _element_scratch_vector(ref_fe, U) - - for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, e) - state_new_q = _quadrature_level_state(state_new, q, e) - R_q = func(physics, interps, x_el, t, Δt, u_el, u_el_old, state_old_q, state_new_q, props_el) - R_el = R_el + R_q - end - - @views _assemble_element!(field, R_el, conns[:, e]) - end -end - -# GPU implementation - -""" -$(TYPEDSIGNATURES) -Kernel for residual block assembly - -TODO mark const fields -""" -# COV_EXCL_START -KA.@kernel function _assemble_block_vector_kernel!( - field::AbstractField, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - E = KA.@index(Global) - - x_el = _element_level_fields_flat(X, ref_fe, conns, E) - u_el = _element_level_fields_flat(U, ref_fe, conns, E) - u_el_old = _element_level_fields_flat(U_old, ref_fe, conns, E) - props_el = _element_level_properties(props, E) - R_el = _element_scratch_vector(ref_fe, U) - - for q in 1:num_quadrature_points(ref_fe) - interps = _cell_interpolants(ref_fe, q) - state_old_q = _quadrature_level_state(state_old, q, E) - state_new_q = _quadrature_level_state(state_new, q, E) - R_q = func(physics, interps, x_el, t, Δt, u_el, u_el_old, state_old_q, state_new_q, props_el) - R_el = R_el + R_q - end - - # now assemble atomically - n_dofs = size(field, 1) - for i in 1:size(conns, 1) - for d in 1:n_dofs - global_id = n_dofs * (conns[i, E] - 1) + d - local_id = n_dofs * (i - 1) + d - Atomix.@atomic field.data[global_id] += R_el[local_id] - end - end -end -# COV_EXCL_STOP - -""" -$(TYPEDSIGNATURES) -Assembly method for a block labelled as block_id. This is a GPU agnostic implementation -using KernelAbstractions and Atomix for eliminating race conditions - -TODO add state variables and physics properties -""" -function _assemble_block_vector!( - backend::KA.Backend, - field::AbstractField, - conns::Connectivity, - func::Function, - physics::AbstractPhysics, ref_fe::ReferenceFE, - X::AbstractField, t::T, Δt::T, - U::Solution, U_old::Solution, - state_old::S, state_new::S, props::AbstractArray -) where { - T <: Number, - Solution <: AbstractField, - S #<: L2QuadratureField -} - kernel! = _assemble_block_vector_kernel!(backend) - kernel!( - field, - conns, - func, - physics, ref_fe, - X, t, Δt, - U, U_old, - state_old, state_new, props, - ndrange=size(conns, 2) - ) - return nothing -end diff --git a/src/bcs/PeriodicBCs.jl b/src/bcs/PeriodicBCs.jl index 48b7ab4..a30b714 100644 --- a/src/bcs/PeriodicBCs.jl +++ b/src/bcs/PeriodicBCs.jl @@ -171,6 +171,33 @@ function _constraint_matrix(dof::DofManager, pbcs::PeriodicBCs) return sparse(Is, Js, Vs) end +function _constraint_matrix_mask(dof_manager::DofManager, pbc::PeriodicBCs) + Is, Js, Vs = Int[], Int[], Float64[] + side_a_dofs = mapreduce(x -> x.side_a_dofs, vcat, pbc.bc_caches) + side_b_dofs = mapreduce(x -> x.side_b_dofs, vcat, pbc.bc_caches) + # @show length(side_a_dofs) + for dof in 1:length(dof_manager) + if dof in side_b_dofs + id = findfirst(isequal(dof), side_b_dofs) + if id > length(dof_manager) + @assert false + end + # push!(Is, dof) + # push!(Js, side_a_dofs[id]) + # push!(Vs, 1.) + + push!(Is, dof) + push!(Js, side_b_dofs[id]) + push!(Vs, 1.) + else + push!(Is, dof) + push!(Js, dof) + push!(Vs, 1.) + end + end + return sparse(Is, Js, Vs) +end + function _create_constraint_field(dof::DofManager, pbcs::PeriodicBCs) n_constraints = mapreduce(x -> length(x.side_a_dofs), +, pbcs.bc_caches) return zeros(n_constraints) diff --git a/test/poisson/TestPoissonPBCs.jl b/test/poisson/TestPoissonPBCs.jl index 3e10847..4bf6393 100644 --- a/test/poisson/TestPoissonPBCs.jl +++ b/test/poisson/TestPoissonPBCs.jl @@ -46,7 +46,7 @@ function test_poisson_pbcs() for n in 1:10 U = sol[1:nu] λ = sol[nu+1:end] - assemble_vector!(asm.residual_storage, asm.dof, residual, U, p) + assemble_vector!(asm.residual_storage, asm.vector_pattern, asm.dof, residual, U, p) R_u = residual(asm) + C' * λ R_c = C * U R = vcat(R_u, R_c)