Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
573a366
add SplitExplicit tests for checkpointer
navidcy Mar 20, 2025
41cb115
add SplitExplicit tests for checkpointer
navidcy Mar 20, 2025
0a52d4e
expose some checkpointer functionality; don't store properties as a C…
navidcy Mar 20, 2025
a776730
use julia v1.10.9
navidcy Mar 20, 2025
b66ae1b
pass properties to write_output!
navidcy Mar 20, 2025
be22062
bump patch release
navidcy Mar 20, 2025
0b67377
validate_properties -> validate_checkpointed_properties
navidcy Mar 20, 2025
fe8e70a
add default properties kwarg to write_output!
navidcy Mar 20, 2025
83f4ea2
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Mar 22, 2025
cbf323d
remove stray spaces and add backticks
navidcy Mar 25, 2025
40dccba
update docstring
navidcy Mar 25, 2025
38ef5b6
code alignment
navidcy Mar 25, 2025
e526910
merge main
navidcy Mar 25, 2025
43a644f
Merge branch 'ncc/checkopointer-shenanigans-2' of github.com:CliMA/Oc…
navidcy Mar 25, 2025
3aeeed7
set_clock! + clock.last_stage_Δt, clock.last_Δt in tick!(clock, Δt)
navidcy Mar 25, 2025
c0eb07b
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Mar 25, 2025
c3c007f
add set_clock! for OceananigansModels
navidcy Mar 25, 2025
a21ff8a
clock.last_Δt = Δt is part of tick!(clock, Δt)
navidcy Mar 25, 2025
3329188
add set_clock!(::Simulation, clock)
navidcy Mar 25, 2025
2f6ef7d
add docs for align_time_step
navidcy Mar 25, 2025
96286b1
wip
navidcy Apr 24, 2025
2686f8c
merge main
navidcy Apr 24, 2025
d5420c7
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy May 23, 2025
7607706
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jun 26, 2025
847c953
Update Project.toml
navidcy Jun 26, 2025
635550b
Update src/OutputWriters/checkpointer.jl
navidcy Jun 26, 2025
0ca81dd
Apply suggestions from code review
navidcy Jun 26, 2025
666dfcc
Update checkpointer.jl
navidcy Jun 26, 2025
97e3ede
Update simulation.jl
navidcy Jun 26, 2025
0992038
Update clock.jl
navidcy Jun 26, 2025
a9e6657
Update clock.jl
navidcy Jun 26, 2025
e4e07e0
clock from main
navidcy Jun 26, 2025
3da6865
updates in clock
navidcy Jun 26, 2025
b91aa9b
clock from main
navidcy Jun 26, 2025
0ac4427
updates in clock
navidcy Jun 26, 2025
cb01cf4
updates in clock
navidcy Jun 26, 2025
2ac952f
clock from main
navidcy Jun 26, 2025
a673011
Update clock.jl
navidcy Jun 26, 2025
28f6e39
import AbstractModel
navidcy Jun 27, 2025
9cface9
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 6, 2025
023df35
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 9, 2025
db85355
Update runge_kutta_3.jl
navidcy Jul 9, 2025
8c9f32d
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 9, 2025
ac6d76f
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 10, 2025
4319b16
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 16, 2025
05dc781
Merge branch 'main' into ncc/checkopointer-shenanigans-2
navidcy Jul 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
env:
JULIA_VERSION: "1.10.8"
JULIA_VERSION: "1.10.9"
JULIA_MINOR_VERSION: "1.10"
TARTARUS_HOME: "/storage5/buildkite-agent"
JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager
Expand Down Expand Up @@ -209,7 +209,7 @@ steps:
architecture: CPU
retry:
automatic:
- exit_status: 1
- exit_status: 1
limit: 1
depends_on: "init"

Expand All @@ -224,7 +224,7 @@ steps:
architecture: CPU
retry:
automatic:
- exit_status: 1
- exit_status: 1
limit: 1
depends_on: "init"

Expand Down Expand Up @@ -337,7 +337,7 @@ steps:
##### Turbulence Closures
#####


- label: "🎣 gpu turbulence closures"
env:
JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER"
Expand Down Expand Up @@ -683,7 +683,7 @@ steps:
architecture: CPU
retry:
automatic:
- exit_status: 1
- exit_status: 1
limit: 1
depends_on: "init"

Expand All @@ -699,7 +699,7 @@ steps:
architecture: CPU
retry:
automatic:
- exit_status: 1
- exit_status: 1
limit: 1
depends_on: "init"

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Oceananigans"
uuid = "9e8cae18-63c1-5223-a75c-80ca9d6e9a09"
authors = ["Climate Modeling Alliance and contributors"]
version = "0.96.2"
version = "0.96.3"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
18 changes: 10 additions & 8 deletions src/Models/Models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ using Oceananigans.Utils: Time

import Oceananigans: initialize!
import Oceananigans.Architectures: architecture
import Oceananigans.TimeSteppers: reset!
import Oceananigans.TimeSteppers: reset!, set_clock!
import Oceananigans.Solvers: iteration

# A prototype interface for AbstractModel.
Expand Down Expand Up @@ -116,6 +116,8 @@ const OceananigansModels = Union{HydrostaticFreeSurfaceModel,
NonhydrostaticModel,
ShallowWaterModel}

set_clock!(model::OceananigansModels, new_clock) = set_clock!(model.clock, new_clock)

"""
possible_field_time_series(model::HydrostaticFreeSurfaceModel)

Expand All @@ -128,10 +130,10 @@ function possible_field_time_series(model::OceananigansModels)
# such as model.diffusivity_fields
return tuple(model_fields, forcing)
end
# Update _all_ `FieldTimeSeries`es in an `OceananigansModel`.

# Update _all_ `FieldTimeSeries`es in an `OceananigansModel`.
# Extract `FieldTimeSeries` from all property names that might contain a `FieldTimeSeries`
# Flatten the resulting tuple by extracting unique values and set! them to the
# Flatten the resulting tuple by extracting unique values and set! them to the
# correct time range by looping over them
function update_model_field_time_series!(model::OceananigansModels, clock::Clock)
time = Time(clock.time)
Expand All @@ -146,7 +148,7 @@ function update_model_field_time_series!(model::OceananigansModels, clock::Clock

return nothing
end

import Oceananigans.TimeSteppers: reset!

function reset!(model::OceananigansModels)
Expand All @@ -162,15 +164,15 @@ function reset!(model::OceananigansModels)
for field in model.timestepper.Gⁿ
fill!(field, 0)
end

return nothing
end

# Check for NaNs in the first prognostic field (generalizes to prescribed velocities).
function default_nan_checker(model::OceananigansModels)
model_fields = prognostic_fields(model)

if isempty(model_fields)
if isempty(model_fields)
return nothing
end

Expand All @@ -182,7 +184,7 @@ end

using Oceananigans.Models.HydrostaticFreeSurfaceModels: OnlyParticleTrackingModel

# Particle tracking models with prescribed velocities (and no tracers)
# Particle tracking models with prescribed velocities (and no tracers)
# have no prognostic fields and no chance to producing a NaN.
default_nan_checker(::OnlyParticleTrackingModel) = nothing

Expand Down
116 changes: 64 additions & 52 deletions src/OutputWriters/checkpointer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,60 @@ using Oceananigans: fields, prognostic_fields
using Oceananigans.Fields: offset_data
using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper

import Oceananigans.Fields: set!
import Oceananigans.Fields: set!

mutable struct Checkpointer{T, P} <: AbstractOutputWriter
mutable struct Checkpointer{T} <: AbstractOutputWriter
schedule :: T
dir :: String
prefix :: String
properties :: P
overwrite_existing :: Bool
verbose :: Bool
cleanup :: Bool
end

function default_checkpointed_properties(model)
properties = [:grid, :particles, :clock, :timestepper]
#if has_ab2_timestepper(model)
# push!(properties, :timestepper)
#end
return properties
end
default_checkpointed_properties(model) = [:grid, :particles, :clock, :timestepper]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add the timestepper only if we use AB2, in the end RK3 does not require it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the fallback be nothing, and each model add defaults that are specific to the model? The models have to figure this out.


has_ab2_timestepper(model) = try
model.timestepper isa QuasiAdamsBashforth2TimeStepper
catch
false
end

# Certain properties are required for `set!` to pickup from a checkpoint.
function required_checkpointed_properties(model)
properties = [:grid, :particles, :clock]

if has_ab2_timestepper(model)
push!(properties, :timestepper)
end

return properties
end

function validate_checkpointed_properties(model, properties)
required_properties = required_checkpointed_properties(model)

for rp in required_properties
if rp ∉ properties
@warn "$rp is required for checkpointing. It will be added to checkpointed properties"
push!(properties, rp)
end
end

for p in properties
p isa Symbol || error("Property $p to be checkpointed must be a Symbol.")
p ∉ propertynames(model) && error("Cannot checkpoint $p, it is not a model property!")

if (p ∉ required_properties) && has_reference(Function, getproperty(model, p))
@warn "model.$p contains a function somewhere in its hierarchy and will not be checkpointed."
filter!(e -> e != p, properties)
end
end

return properties
end


"""
Checkpointer(model;
schedule,
Expand Down Expand Up @@ -79,36 +107,11 @@ function Checkpointer(model; schedule,
prefix = "checkpoint",
overwrite_existing = false,
verbose = false,
cleanup = false,
properties = default_checkpointed_properties(model))

# Certain properties are required for `set!` to pickup from a checkpoint.
required_properties = [:grid, :particles, :clock]

if has_ab2_timestepper(model)
push!(required_properties, :timestepper)
end

for rp in required_properties
if rp ∉ properties
@warn "$rp is required for checkpointing. It will be added to checkpointed properties"
push!(properties, rp)
end
end

for p in properties
p isa Symbol || error("Property $p to be checkpointed must be a Symbol.")
p ∉ propertynames(model) && error("Cannot checkpoint $p, it is not a model property!")

if (p ∉ required_properties) && has_reference(Function, getproperty(model, p))
@warn "model.$p contains a function somewhere in its hierarchy and will not be checkpointed."
filter!(e -> e != p, properties)
end
end
cleanup = false)

mkpath(dir)

return Checkpointer(schedule, dir, prefix, properties, overwrite_existing, verbose, cleanup)
return Checkpointer(schedule, dir, prefix, overwrite_existing, verbose, cleanup)
end

#####
Expand Down Expand Up @@ -177,29 +180,40 @@ end
function write_output!(c::Checkpointer, model)
filepath = checkpoint_path(model.clock.iteration, c)
c.verbose && @info "Checkpointing to file $filepath..."
addr = checkpointer_address(model)

t1 = time_ns()

jldopen(filepath, "w") do file
file["$addr/checkpointed_properties"] = c.properties
serializeproperties!(file, model, c.properties, addr)
model_fields = prognostic_fields(model)
field_names = keys(model_fields)
for name in field_names
full_address = "$addr/$name"
serializeproperty!(file, full_address, model_fields[name])
end
end
write_output!(c, model, filepath, "w")

t2, sz = time_ns(), filesize(filepath)

c.verbose && @info "Checkpointing done: time=$(prettytime((t2 - t1) * 1e-9)), size=$(pretty_filesize(sz))"

c.cleanup && cleanup_checkpoints(c)

return nothing
end

function write_output!(c, model, filepath::AbstractString, mode::AbstractString;
properties = default_checkpointed_properties(model))
@show properties
@show model

properties = validate_checkpointed_properties(model, properties)
addr = checkpointer_address(model)

jldopen(filepath, mode) do file
file["$addr/checkpointed_properties"] =
serializeproperties!(file, model, properties, addr)
model_fields = prognostic_fields(model)
field_names = keys(model_fields)
for name in field_names
full_address = "$addr/$name"
serializeproperty!(file, full_address, model_fields[name])
end
end
end

function cleanup_checkpoints(checkpointer)
filepaths = glob(checkpoint_superprefix(checkpointer.prefix) * "*.jld2", checkpointer.dir)
latest_checkpoint_filepath = latest_checkpoint(checkpointer, filepaths)
Expand All @@ -212,13 +226,12 @@ end
#####

"""
set!(model, filepath::AbstractString)
set!(model::AbstractModel, filepath::AbstractString)

Set data in `model.velocities`, `model.tracers`, `model.timestepper.Gⁿ`, and
`model.timestepper.G⁻` to checkpointed data stored at `filepath`.
"""
function set!(model::AbstractModel, filepath::AbstractString)

addr = checkpointer_address(model)

jldopen(filepath, "r") do file
Expand Down Expand Up @@ -280,9 +293,8 @@ function set_time_stepper_tendencies!(timestepper, file, model_fields, addr)
return nothing
end

# For self-starting timesteppers like RK3 we do nothing
# For self-starting timesteppers like RK3 we do nothing
set_time_stepper!(timestepper, args...) = nothing

set_time_stepper!(timestepper::QuasiAdamsBashforth2TimeStepper, args...) =
set_time_stepper_tendencies!(timestepper, args...)

Loading
Loading