diff --git a/Project.toml b/Project.toml index a2b3539b33..edbd8c2a6f 100755 --- a/Project.toml +++ b/Project.toml @@ -35,6 +35,7 @@ StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" [weakdeps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" @@ -47,7 +48,7 @@ XESMF = "2e0b0046-e7a1-486f-88de-807ee8ffabe5" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" [extensions] -OceananigansAMDGPUExt = "AMDGPU" +OceananigansAMDGPUExt = ["AMDGPU", "AbstractFFTs"] OceananigansCUDAExt = ["CUDA", "GPUArrays", "GPUArraysCore"] OceananigansEnzymeExt = "Enzyme" OceananigansMakieExt = "Makie" @@ -59,6 +60,7 @@ OceananigansXESMFExt = ["XESMF"] [compat] AMDGPU = "1.3.6, 2" +AbstractFFTs = "1.5" Adapt = "4.1.1" Aqua = "0.8" CUDA = "=5.8.5, 5.9.1" diff --git a/ext/OceananigansAMDGPUExt.jl b/ext/OceananigansAMDGPUExt.jl index 11bd30c536..473e53ed38 100644 --- a/ext/OceananigansAMDGPUExt.jl +++ b/ext/OceananigansAMDGPUExt.jl @@ -1,9 +1,11 @@ module OceananigansAMDGPUExt using Oceananigans -using InteractiveUtils -using AMDGPU, AMDGPU.rocSPARSE, AMDGPU.rocFFT +using AMDGPU, AMDGPU.rocSPARSE +using AbstractFFTs: plan_fft!, plan_ifft! + using Oceananigans.Utils: linear_expand, __linear_ndrange, MappedCompilerMetadata +using InteractiveUtils using KernelAbstractions: __dynamic_checkbounds, __iterspace using KernelAbstractions using SparseArrays @@ -15,10 +17,10 @@ import Oceananigans.Fields as FD import Oceananigans.Grids as GD import Oceananigans.Solvers as SO import Oceananigans.Utils as UT -import SparseArrays: SparseMatrixCSC +import Oceananigans.DistributedComputations: Distributed import KernelAbstractions: __iterspace, __groupindex, __dynamic_checkbounds, __validindex, CompilerMetadata -import Oceananigans.DistributedComputations: Distributed +import SparseArrays: SparseMatrixCSC const GPUVar = Union{ROCArray, Ptr} @@ -89,7 +91,7 @@ BC.validate_boundary_condition_architecture(::ROCArray, ::AC.CPU, bc, side) = function SO.plan_forward_transform(A::ROCArray, ::Union{GD.Bounded, GD.Periodic}, dims, planner_flag) length(dims) == 0 && return nothing - return AMDGPU.rocFFT.plan_fft!(A, dims) + return plan_fft!(A, dims) end FD.set!(v::Field, a::ROCArray) = FD.set_to_array!(v, a) @@ -97,7 +99,7 @@ DC.set!(v::DC.DistributedField, a::ROCArray) = DC.set_to_array!(v, a) function SO.plan_backward_transform(A::ROCArray, ::Union{GD.Bounded, GD.Periodic}, dims, planner_flag) length(dims) == 0 && return nothing - return AMDGPU.rocFFT.plan_ifft!(A, dims) + return plan_ifft!(A, dims) end AMDGPU.Device.@device_override @inline function __validindex(ctx::MappedCompilerMetadata) diff --git a/test/test_amdgpu.jl b/test/test_amdgpu.jl index 7461d2bf47..7a26b3b61a 100644 --- a/test/test_amdgpu.jl +++ b/test/test_amdgpu.jl @@ -1,37 +1,115 @@ include("dependencies_for_runtests.jl") using AMDGPU +using SeawaterPolynomials.TEOS10: TEOS10EquationOfState -@testset "AMDGPU extension" begin +function build_and_timestep_simulation(model) + FT = eltype(model) + + for field in merge(model.velocities, model.tracers) + @test parent(field) isa ROCArray + end + + simulation = Simulation(model, Δt=1minute, stop_iteration=3, verbose=false) + run!(simulation) + + @test iteration(simulation) == 3 + @test time(simulation) ≈ FT(3minutes) + + return nothing +end + +@testset "AMDGPU on RectilinearGrids" begin + roc = AMDGPU.ROCBackend() + arch = GPU(roc) + + for FT in float_types + @info "Testing grids on $arch with $FT..." + + regular_grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=(0, 4), y=(0, 1), z=(0, 16)) + horizontally_stretched_grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=[0, 1, 2, 3, 4], y=(0, 1), z=(0, 16)) + vertically_stretched_grid = RectilinearGrid(arch, FT, size=(16, 8, 4), x=(0, 16), y=(0, 1), z=[0, 1, 2, 3, 4]) + + @test parent(horizontally_stretched_grid.xᶠᵃᵃ) isa ROCArray + @test parent(horizontally_stretched_grid.xᶜᵃᵃ) isa ROCArray + + @test parent(vertically_stretched_grid.z.cᵃᵃᶠ) isa ROCArray + @test parent(vertically_stretched_grid.z.cᵃᵃᶜ) isa ROCArray + @test parent(vertically_stretched_grid.z.Δᵃᵃᶠ) isa ROCArray + @test parent(vertically_stretched_grid.z.Δᵃᵃᶜ) isa ROCArray + + for grid in (regular_grid, horizontally_stretched_grid, vertically_stretched_grid) + @test eltype(grid) == FT + @test architecture(grid) isa GPU + end + + @info "Testing HydrostaticFreeSurfaceModel on $arch with $FT..." + + coriolis = FPlane(latitude=45) + buoyancy = BuoyancyTracer() + tracers = :b + advection = WENO(order=5) + + for grid in (regular_grid, horizontally_stretched_grid, vertically_stretched_grid) + momentum_advection = tracer_advection = advection + + free_surface = SplitExplicitFreeSurface(grid; substeps=60) + + model = HydrostaticFreeSurfaceModel(grid; free_surface, + coriolis, buoyancy, tracers, + momentum_advection, tracer_advection) + + build_and_timestep_simulation(model) + end + + @info "Testing NonhydrostaticModel on $arch with $FT..." + + for grid in (regular_grid, vertically_stretched_grid) + cg_kw = (maxiter=10, reltol=1e-7, abstol=1e-7, preconditioner=nothing) + pressure_solvers = (Oceananigans.Solvers.ConjugateGradientPoissonSolver(grid; cg_kw...), + Oceananigans.Solvers.FFTBasedPoissonSolver(grid)) + + for pressure_solver in pressure_solvers + model = NonhydrostaticModel(grid; pressure_solver, + coriolis, buoyancy, + tracers, advection) + + build_and_timestep_simulation(model) + end + end + end +end + +@testset "AMDGPU on LatitudeLongitudeGrid with HydrostaticFreeSurfaceModel" begin roc = AMDGPU.ROCBackend() arch = GPU(roc) for FT in float_types @info " Testing on $arch with $FT" - grid = RectilinearGrid(arch, FT, size=(4, 8, 16), x=[0, 1, 2, 3, 4], y=(0, 1), z=(0, 16)) + grid = LatitudeLongitudeGrid(arch, FT, size=(4, 8, 16), longitude=(-60, 60), latitude=(0, 60), z=(0, 1)) - @test parent(grid.xᶠᵃᵃ) isa ROCArray - @test parent(grid.xᶜᵃᵃ) isa ROCArray + @test parent(grid.Δxᶜᶜᵃ) isa ROCArray + @test parent(grid.Δxᶠᶜᵃ) isa ROCArray + @test parent(grid.Δxᶜᶠᵃ) isa ROCArray + @test parent(grid.Δxᶠᶠᵃ) isa ROCArray + @test parent(grid.Azᶜᶜᵃ) isa ROCArray + @test parent(grid.Azᶠᶜᵃ) isa ROCArray + @test parent(grid.Azᶜᶠᵃ) isa ROCArray + @test parent(grid.Azᶠᶠᵃ) isa ROCArray @test eltype(grid) == FT @test architecture(grid) isa GPU - model = HydrostaticFreeSurfaceModel(grid; + equation_of_state = TEOS10EquationOfState() + buoyancy = SeawaterBuoyancy(; equation_of_state) + + model = HydrostaticFreeSurfaceModel(grid; buoyancy, coriolis = FPlane(latitude=45), - buoyancy = BuoyancyTracer(), - tracers = :b, + tracers = (:T, :S), momentum_advection = WENO(order=5), tracer_advection = WENO(order=5), free_surface = SplitExplicitFreeSurface(grid; substeps=60)) - for field in merge(model.velocities, model.tracers) - @test parent(field) isa ROCArray - end - - simulation = Simulation(model, Δt=1minute, stop_iteration=3) - run!(simulation) - - @test iteration(simulation) == 3 - @test time(simulation) == 3minutes + build_and_time_step_simulation(model) end end