Skip to content
Open
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
ConservativeRegridding = "8e50ac2c-eb48-49bc-a402-07c87b949343"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
Expand All @@ -49,7 +50,7 @@ XESMF = "2e0b0046-e7a1-486f-88de-807ee8ffabe5"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"

[extensions]
OceananigansAMDGPUExt = "AMDGPU"
OceananigansAMDGPUExt = ["AMDGPU", "AbstractFFTs"]
OceananigansCUDAExt = ["CUDA", "GPUArrays", "GPUArraysCore"]
OceananigansConservativeRegriddingExt = "ConservativeRegridding"
OceananigansEnzymeExt = "Enzyme"
Expand All @@ -62,6 +63,7 @@ OceananigansXESMFExt = ["XESMF"]

[compat]
AMDGPU = "1.3.6, 2"
AbstractFFTs = "1.5"
Adapt = "4.1.1"
Aqua = "0.8"
CUDA = "=5.8.5, 5.9.1"
Expand Down
14 changes: 8 additions & 6 deletions ext/OceananigansAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
module OceananigansAMDGPUExt

using Oceananigans
using InteractiveUtils
using AMDGPU, AMDGPU.rocSPARSE, AMDGPU.rocFFT
using AMDGPU, AMDGPU.rocSPARSE
using AbstractFFTs: plan_fft!, plan_ifft!

using Oceananigans.Utils: linear_expand, __linear_ndrange, MappedCompilerMetadata
using InteractiveUtils
using KernelAbstractions: __dynamic_checkbounds, __iterspace
using KernelAbstractions
using SparseArrays
Expand All @@ -15,10 +17,10 @@ import Oceananigans.Fields as FD
import Oceananigans.Grids as GD
import Oceananigans.Solvers as SO
import Oceananigans.Utils as UT
import SparseArrays: SparseMatrixCSC
import Oceananigans.DistributedComputations: Distributed
import KernelAbstractions: __iterspace, __groupindex, __dynamic_checkbounds,
__validindex, CompilerMetadata
import Oceananigans.DistributedComputations: Distributed
import SparseArrays: SparseMatrixCSC

const GPUVar = Union{ROCArray, Ptr}

Expand Down Expand Up @@ -89,15 +91,15 @@ BC.validate_boundary_condition_architecture(::ROCArray, ::AC.CPU, bc, side) =

function SO.plan_forward_transform(A::ROCArray, ::Union{GD.Bounded, GD.Periodic}, dims, planner_flag)
length(dims) == 0 && return nothing
return AMDGPU.rocFFT.plan_fft!(A, dims)
return plan_fft!(A, dims)
end

FD.set!(v::Field, a::ROCArray) = FD.set_to_array!(v, a)
DC.set!(v::DC.DistributedField, a::ROCArray) = DC.set_to_array!(v, a)

function SO.plan_backward_transform(A::ROCArray, ::Union{GD.Bounded, GD.Periodic}, dims, planner_flag)
length(dims) == 0 && return nothing
return AMDGPU.rocFFT.plan_ifft!(A, dims)
return plan_ifft!(A, dims)
end

AMDGPU.Device.@device_override @inline function __validindex(ctx::MappedCompilerMetadata)
Expand Down
111 changes: 95 additions & 16 deletions test/test_amdgpu.jl
Original file line number Diff line number Diff line change
@@ -1,37 +1,116 @@
include("dependencies_for_runtests.jl")

using AMDGPU
using SeawaterPolynomials.TEOS10: TEOS10EquationOfState

@testset "AMDGPU extension" begin
function build_and_timestep_simulation(model)
FT = eltype(model)

for field in merge(model.velocities, model.tracers)
@test parent(field) isa ROCArray
end

simulation = Simulation(model, Δt=1minute, stop_iteration=3, verbose=false)
run!(simulation)

@test iteration(simulation) == 3
@test time(simulation) ≈ FT(3minutes)

return nothing
end

@testset "AMDGPU on RectilinearGrids" begin
roc = AMDGPU.ROCBackend()
arch = GPU(roc)

for FT in float_types
@info "Testing grids on $arch with $FT..."

regular_grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=(0, 4), y=(0, 1), z=(0, 16))
horizontally_stretched_grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=[0, 1, 2, 3, 4], y=(0, 1), z=(0, 16))
vertically_stretched_grid = RectilinearGrid(arch, FT, size=(16, 8, 4), x=(0, 16), y=(0, 1), z=[0, 1, 2, 3, 4])

@test parent(horizontally_stretched_grid.xᶠᵃᵃ) isa ROCArray
@test parent(horizontally_stretched_grid.xᶜᵃᵃ) isa ROCArray

@test parent(vertically_stretched_grid.z.cᵃᵃᶠ) isa ROCArray
@test parent(vertically_stretched_grid.z.cᵃᵃᶜ) isa ROCArray
@test parent(vertically_stretched_grid.z.Δᵃᵃᶠ) isa ROCArray
@test parent(vertically_stretched_grid.z.Δᵃᵃᶜ) isa ROCArray

for grid in (regular_grid, horizontally_stretched_grid, vertically_stretched_grid)
@test eltype(grid) == FT
@test architecture(grid) isa GPU
end

@info "Testing HydrostaticFreeSurfaceModel on $arch with $FT..."

coriolis = FPlane(latitude=45)
buoyancy = BuoyancyTracer()
tracers = :b
advection = WENO(order=5)

for grid in (regular_grid, horizontally_stretched_grid, vertically_stretched_grid)
momentum_advection = tracer_advection = advection

free_surface = SplitExplicitFreeSurface(grid; substeps=60)

model = HydrostaticFreeSurfaceModel(grid; free_surface,
coriolis, buoyancy, tracers,
momentum_advection, tracer_advection)

build_and_timestep_simulation(model)
end

@info "Testing NonhydrostaticModel on $arch with $FT..."

for grid in (regular_grid, vertically_stretched_grid)
cg_kw = (maxiter=10, reltol=1e-7, abstol=1e-7, preconditioner=nothing)
pressure_solvers = (Oceananigans.Solvers.ConjugateGradientPoissonSolver(grid; cg_kw...),
Oceananigans.Solvers.FFTBasedPoissonSolver(grid))

for pressure_solver in pressure_solvers
model = NonhydrostaticModel(grid; pressure_solver,
coriolis, buoyancy,
tracers, advection)

build_and_timestep_simulation(model)
end
end
end
end

@testset "AMDGPU on LatitudeLongitudeGrid with HydrostaticFreeSurfaceModel" begin
roc = AMDGPU.ROCBackend()
arch = GPU(roc)

for FT in float_types
@info " Testing on $arch with $FT"

grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=[0, 1, 2, 3, 4], y=(0, 1), z=(0, 16))
grid = LatitudeLongitudeGrid(arch, FT, size=(4, 8, 16), longitude=(-60, 60), latitude=(0, 60), z=(0, 1))

@test parent(grid.xᶠᵃᵃ) isa ROCArray
@test parent(grid.xᶜᵃᵃ) isa ROCArray
@test parent(grid.Δxᶜᶜᵃ) isa ROCArray
@test parent(grid.Δxᶠᶜᵃ) isa ROCArray
@test parent(grid.Δxᶜᶠᵃ) isa ROCArray
@test parent(grid.Δxᶠᶠᵃ) isa ROCArray
@test parent(grid.Azᶜᶜᵃ) isa ROCArray
@test parent(grid.Azᶠᶜᵃ) isa ROCArray
@test parent(grid.Azᶜᶠᵃ) isa ROCArray
@test parent(grid.Azᶠᶠᵃ) isa ROCArray
@test eltype(grid) == FT
@test architecture(grid) isa GPU

model = HydrostaticFreeSurfaceModel(grid;
equation_of_state = TEOS10EquationOfState()
buoyancy = SeawaterBuoyancy(; equation_of_state)

model = HydrostaticFreeSurfaceModel(grid; buoyancy,
coriolis = FPlane(latitude=45),
buoyancy = BuoyancyTracer(),
tracers = :b,
tracers = (:T, :S),
momentum_advection = WENO(order=5),
tracer_advection = WENO(order=5),
free_surface = SplitExplicitFreeSurface(grid; substeps=60))

for field in merge(model.velocities, model.tracers)
@test parent(field) isa ROCArray
end

simulation = Simulation(model, Δt=1minute, stop_iteration=3)
run!(simulation)

@test iteration(simulation) == 3
@test time(simulation) == 3minutes
build_and_time_step_simulation(model)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@glwagner you added this line in f835c0d, but this function doesn't exist anywhere as far as I can tell.

end
end