Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Variable as a disk array #42

Merged
merged 5 commits into from
Mar 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/variables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ abstract type AbstractGRIBVariable{T, N} <: AbstractVariable{T, N} end

"""
DiskValues{T, N, M} <: DA.AbstractDiskArray{T, N}

Object that maps the dimensions lookup to GRIB messages offsets.
`message_dims` are the dimensions that are found in the GRIB message (namely longitudes and latitudes).
`other_dims` are the dimensions that have been infered from reading the GRIB file index.
Expand Down Expand Up @@ -90,7 +91,7 @@ function DA.readblock!(A::DiskValues, aout, i::AbstractUnitRange...)
end

DA.eachchunk(A::DiskValues) = DA.GridChunks(A, size(A))
DA.haschunks(A::DiskValues) = DA.Unchunked()
DA.haschunks(A::DiskValues) = DA.Chunked() # Its basically one large chunk

"""
Variable <: AbstractArray
Expand All @@ -105,7 +106,6 @@ struct Variable{T, N, TA <: Union{Array{T, N}, DA.AbstractDiskArray{T, N}}, TP}
end
Base.parent(var::Variable) = var.values
Base.size(var::Variable) = _size_dims(var.dims)
Base.getindex(var::Variable, I...) = getindex(parent(var), I...)

ndims(::AbstractGRIBVariable{T,N}) where {T,N} = N
varname(var::Variable) = var.name
Expand All @@ -125,6 +125,19 @@ dataset(var::AbstractGRIBVariable) = var.ds

_get_dim(var::Variable, key::String) = _get_dim(var.dims, key)

DA.@implement_diskarray Variable
# Avoid DiskArrays.jl indexing when the parent is an Array
Base.getindex(var::Variable{T,N,Array{T,N}}, I::AbstractUnitRange...) where {T,N} =
getindex(parent(var), I...)
Base.getindex(var::Variable{T,N,Array{T,N}}, I...) where {T,N} =
getindex(parent(var), I...)

function DA.readblock!(A::Variable, aout, i::AbstractUnitRange...)
DA.readblock!(parent(A), aout, i...)
end
DA.eachchunk(A::Variable) = DA.eachchunk(parent(A))
DA.haschunks(A::Variable) = DA.haschunks(parent(A))

function Variable(ds::GRIBDataset, key)
dsdims = ds.dims
if key in dsdims
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
GRIB = "b16dfd50-4035-11e9-28d4-9dfe17e6779b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

Expand Down
24 changes: 22 additions & 2 deletions test/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@ using GRIBDatasets: DATA_ATTRIBUTES_KEYS, GRID_TYPE_MAP
using GRIBDatasets: _to_datetime
using GRIBDatasets: DiskValues, Variable, CFVariable, cfvariable
using GRIBDatasets: CDM
using DiskArrays

grib_path = joinpath(dir_testfiles, "era5-levels-members.grib")
varstring = "z"

@testset "dataset and variables" begin
grib_path = joinpath(dir_testfiles, "era5-levels-members.grib")
ds = GRIBDataset(grib_path)
dsmis = GRIBDataset(joinpath(dir_testfiles, "fields_with_missing_values.grib"))
dsNaN = GRIBDataset(joinpath(dir_testfiles, "fields_with_missing_values.grib"),maskingvalue = NaN)
index = ds.index

varstring = "z"
@testset "CommonDataModel implementation" begin
@test CDM.dim(ds, "number") == 10
@test length(CDM.dimnames(ds)) == 5
Expand Down Expand Up @@ -196,3 +198,21 @@ end
end

end

@testset "diskarrays" begin
# No scalar indexing allowed
DiskArrays.allowscalar(false)
ds = GRIBDataset(grib_path)
# CFVariable is not a disk array, so will be super slow here.
# But the underlying variable is
var = ds[varstring].var
@test DiskArrays.isdisk(var)
# Currently just one huge chunk
@test length(DiskArrays.eachchunk(var)) == 1
# Broadcasts are lazy
B = var .* 10
@test B isa DiskArrays.BroadcastDiskArray
@test B[1:50, 1:50, 1, 1, 1] isa Matrix
# Reduction is chunked
@test sum(var) * 10 == sum(B)
end
Loading