-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathioapi.r
437 lines (394 loc) · 16 KB
/
ioapi.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# convenience methods for IOAPI files
library(ncdf4)
# get plot-relevant global attributes from IOAPI file, return list
get.plot.addrs.from.IOAPI <- function(
ioapi.file # class=ncdf4 from nc_open(...)
) {
# Note: presumes lengths in meters!
x.orig.m <- ncatt_get(ioapi.file, varid=0, attname="XORIG")$value
x.orig.km <- x.orig.m/1000
y.orig.m <- ncatt_get(ioapi.file, varid=0, attname="YORIG")$value
y.orig.km <- y.orig.m/1000
x.cell.size.m <- ncatt_get(ioapi.file, varid=0, attname="XCELL")$value
x.cell.size.km <- x.cell.size.m/1000
y.cell.size.m <- ncatt_get(ioapi.file, varid=0, attname="YCELL")$value
y.cell.size.km <- y.cell.size.m/1000
x.n.grid <- ncatt_get(ioapi.file, varid=0, attname="NCOLS")$value
y.n.grid <- ncatt_get(ioapi.file, varid=0, attname="NROWS")$value
attrs.list <- list()
attrs.list$x.cell.centers.km <-
seq(from=x.orig.km + x.cell.size.km/2, length=x.n.grid, by=x.cell.size.km)
attrs.list$y.cell.centers.km <-
seq(from=y.orig.km + y.cell.size.km/2, length=y.n.grid, by=y.cell.size.km)
# start debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
# print(paste('class(x.cell.centers.km)==', class(attrs.list$x.cell.centers.km), sep=""))
# print(paste('class(y.cell.centers.km)==', class(attrs.list$y.cell.centers.km), sep=""))
# end debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
attrs.list
} # end function get.plot.addrs.from.IOAPI
# if value of IOAPI global attribute with name given does not match
# desired value, make it so
fix.IOAPI.global.attribute <- function(
ioapi.file, # class=ncdf4 from nc_open(...)
attr.name, # string name of global attribute
value.correct
) {
# TODO: only call once, cache list(foo$hasatt, foo$value)
if (ncatt_get(ioapi.file, varid=0, attname=attr.name)$hasatt) {
value.current <- ncatt_get(ioapi.file, varid=0, attname=attr.name)$value
# start debugging
# this dies with error: cat(sprintf('%s : value.current=%i, value.correct=%i\n', attr.name, value.current, value.correct))
# cat(sprintf('%s : value.current=%f, value.correct=%f\n', attr.name, value.current, value.correct))
# end debugging
# this is not sufficient for, e.g., vector testing:
# if (value.current != value.correct) {
# gets warning message that
# > the condition has length > 1 and only the first element will be used
# so the test succeeds when it should fail.
if (
(class(value.current) != class(value.correct)) ||
(length(value.current) != length(value.correct)) ||
(value.current != value.correct)
) {
ncatt_put(ioapi.file, varid=0, attname=attr.name, attval=value.correct,
verbose=FALSE)
}
} # TODO: else, throw error (on not having the attribute)
# return
ioapi.file
} # end function fix.IOAPI.global.attribute
# if IOAPI global attribute name=NLAYS does not match actual layer count,
# make it so
fix.NLAYS <- function(
ioapi.file, # class=ncdf4 from nc_open(...)
nlays.correct # value
) {
ioapi.file <- fix.IOAPI.global.attribute(
ioapi.file, attr.name="NLAYS", value.correct=nlays.correct)
# return
ioapi.file
} # end function fix.NLAYS
# if IOAPI global attribute name=NVARS does not match actual variable count,
# make it so
fix.NVARS <- function(
ioapi.file, # class=ncdf4 from nc_open(...)
nvars.correct # value
) {
ioapi.file <- fix.IOAPI.global.attribute(
ioapi.file, attr.name="NVARS", value.correct=nvars.correct)
# return
ioapi.file
} # end function fix.NVARS
# if IOAPI global attribute name=VAR-LIST does not contain the "real" variable name(s)
# (minus the IOAPI accounting variable=TFLAG), make it so
fix.VARdashLIST <- function(
source.file, # class=ncdf4 from nc_open(...)
names.vec, # character vector of names of datavars we want to keep,
# in the order we want to keep them!
target.file
) {
# TODO: more testing of inputs
stopifnot(!is.element("TFLAG", names.vec)) # ASSERT: TFLAG not a member
correct.names.string <- ""
for (name in names.vec) {
# IOAPI wants names with spaces appended to length=16
# correct.name <- sprintf("%-16s", name)
# correct.names.string <- correct.names.string + correct.name
correct.names.string <- sprintf("%s%-16s", correct.names.string, name)
}
target.file <- fix.IOAPI.global.attribute(
target.file, attr.name="VAR-LIST", value.correct=correct.names.string)
# return
target.file
} # end function fix.VARdashLIST
# Note VGLVLS seems to want to have
# * length(VGLVLS) == n(layers) +1
# * first member == 1.0 (or "1.f" in `ncdump`)
create.VGLVLS <- function(
layers.n # number of layers the created vector should reflect
) {
# constants we'll use in calculation, suggested by values in 5yravg.test.nc:VGLVLS
vglvls.first <- 1.0
vglvls.from <- 0.1
vglvls.by <- 0.01
correct.VGLVLS <- double(0)
# seq(from, to, by) -> ((to - from) / by) +1 == length(seq) == layers.n
# (to - from) / by == layers.n - 1
# (to - from) == by * (layers.n - 1)
# to == (by * (layers.n - 1)) + from
if (layers.n < 1) {
stop(sprintf('ioapi.r: (layers.n==%i) < 1\n',
layers.n))
} else if (layers.n == 1) {
correct.VGLVLS <- c(vglvls.first, vglvls.from)
} else { # layers.n > 1
vglvls.to <- (vglvls.by * (layers.n - 1)) + vglvls.from
correct.VGLVLS <- c(vglvls.first,
seq(from=vglvls.from, to=vglvls.to, by=vglvls.by))
# start debugging
# cat(sprintf('ioapi.r:create.VGLVLS: from==%f, to==%f, by==%f\n',
# vglvls.from, vglvls.to, vglvls.by))
# print(' correct.VGLVLS==')
# print(correct.VGLVLS)
# end debugging
} # end testing layers.n
# debugging
# print('ioapi.r:create.VGLVLS: correct.VGLVLS==') ; print(correct.VGLVLS)
if (length(correct.VGLVLS) != (layers.n +1)) {
stop(sprintf('ioapi.r: (length(correct.VGLVLS)==%i) != ((layers.n +1)==%i)\ncorrect.VGLVLS==%s\n',
length(correct.VGLVLS), (layers.n +1), correct.VGLVLS))
}
# else, return
correct.VGLVLS
} # end function create.VGLVLS
# testcases for create.VGLVLS:
# > source('./ioapi.r')
# > create.VGLVLS(-1)
# Error in create.VGLVLS(-1) : ioapi.r: (layers.n==-1) < 1
# > create.VGLVLS(0)
# Error in create.VGLVLS(0) : ioapi.r: (layers.n==0) < 1
# > create.VGLVLS(1)
# [1] 1.0 0.1
# > create.VGLVLS(2)
# [1] 1.00 0.10 0.11
# > create.VGLVLS(3)
# [1] 1.00 0.10 0.11 0.12
# > length(create.VGLVLS(42))
# [1] 43
# if IOAPI global attribute name=VGLVLS does not contain as many members
# as we think it should, make it so
fix.VGLVLS <- function(
source.file, # class=ncdf4 from nc_open(...)
layers.n, # number of layers this vector should reflect
target.file
) {
# TODO: more testing of inputs
target.file <- fix.IOAPI.global.attribute(
target.file, attr.name="VGLVLS", value.correct=create.VGLVLS(layers.n))
# return
target.file
} # end function fix.VGLVLS
# if NCO does not copy the FillValue from the source datavar, make it so
fix.FillValue <- function(
source.file, # class=ncdf4 from nc_open(...)
names.vec, # character vector of names of datavars we want to keep,
# in the order we want to keep them!
target.file
) {
# TODO: more testing of inputs
stopifnot(!is.element("TFLAG", names.vec)) # ASSERT: TFLAG not a member
for (name in names.vec) {
# check return types: doc unclear
FillValue <- ncatt_get(source.file, varid=datavar.name, attname="_FillValue", verbose=FALSE)
target.file <- fix.IOAPI.global.attribute(
target.file, attr.name="_FillValue", value.correct=FillValue)
}
# return
target.file
} # end function fix.FillValue
# The following function assumes that target.file has been created by
# 1 cp source.file target.file
# 2 removing data variables from target.file
# That works for netCDF, but not for I/O API. Among other things,
# IOAPI requires adjustments to its special "datavar""=TFLAG,
# which this function provides.
# IOAPI "datavar"=TFLAG has structure like:
# in `ncdump -h`:
# > dimensions:
# > TSTEP = UNLIMITED ; // (1 currently)
# > DATE-TIME = 2 ;
# > LAY = 42 ;
# > VAR = 29 ;
# > ROW = 299 ;
# > COL = 459 ;
# > variables:
# > int TFLAG(TSTEP, VAR, DATE-TIME) ;
# > TFLAG:units = "<YYYYDDD,HHMMSS>" ;
# > TFLAG:long_name = "TFLAG " ;
# > TFLAG:var_desc = "Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS " ;
# in source.file$var[["TFLAG"]]:
# > attr(,"class")
# > [1] "ncid4"
#
# > $name
# > [1] "TFLAG"
#
# > $ndims
# > [1] 3
#
# > $natts
# > [1] 3
#
# > $size
# > [1] 2 29 1
# ==DATE-TIME, VAR, TSTEP
# TODO: handle missing values explicitly!
fix.TFLAG <- function(
source.file, # class=ncdf4 from nc_open(...)
names.vec, # character vector of names of datavars we want to keep,
# in the order we want to keep them! NOT including "TFLAG"
timesteps.n, # number of timesteps, should be same for all datavars
target.file,
target.fp # kludge: debugging dies due to suicidal target.file?
) {
# TODO: more testing of inputs
stopifnot(!is.element("TFLAG", names.vec)) # ASSERT: TFLAG not a member
source.datavar <- ncvar_get(source.file, varid="TFLAG")
target.datavar <- ncvar_get(target.file, varid="TFLAG")
# get indices of the datavars we want to keep, in source order
source.datavar.names <- get.datavar.names.in.order(source.file)
# # source.datavar.names includes datavar=TFLAG, which is *not* in itself!
# # i.e., not in either *.datavar. So ...
# stopifnot(source.datavar.names[1] == "TFLAG") # ASSERT
# datavar.names <- source.datavar.names[2:(length(source.datavar.names))]
datavar.names <- source.datavar.names
# ... and from that get the indices of the datavars we want to keep
source.indices.vec <- numeric(0)
# start debugging
# cat(sprintf('fix.TFLAG:\n'))
# cat(sprintf('\tnames.vec=')) ; print(names.vec)
# cat(sprintf('\tdatavar.names=')) ; print(datavar.names)
# end debugging
for (name in names.vec) {
source.indices.vec[length(source.indices.vec) +1] <-
which(datavar.names == name, arr.ind = TRUE)
}
stopifnot(length(names.vec) == length(source.indices.vec)) # ASSERT
# Now transfer the TFLAG data for the datavars we want in target.
datavar.dims.n <- length(dim(source.datavar))
datavar.datetimes.n <- dim(source.datavar)[1]
source.datavar.vars.n <- dim(source.datavar)[2]
target.datavar.vars.n <- length(names.vec)
# Following are used in reading one timestep at a time
# (Pierce-style read: see help(ncvar_get)#Examples)
start <- rep(1,datavar.dims.n) # start=(1,1,1,...)
# Remember timelike dim is always the LAST dimension!
# for TFLAG: DATE-TIME, VAR, TSTEP
count.source.raw <- dim(source.datavar)
count.target.raw <- dim(target.datavar)
count.target.raw[2] <- target.datavar.vars.n
# but if val=1, timestep index is omitted from dim(source.datavar),
# breaking Pierce-style read (below)
if (datavar.dims.n < 2) {
# TODO: throw
cat(sprintf('ERROR: ioapi.r: datavar.dims.n==%i\n', datavar.dims.n))
} else if (datavar.dims.n == 2) {
count.source <- c(count.source.raw, timesteps.n)
count.target <- c(count.target.raw, timesteps.n)
start <- c(start, timesteps.n)
datavar.dims.max.vec <- count.source
datavar.dims.n <- 3
} else if (datavar.dims.n == 3) {
count.source <- count.source.raw
datavar.dims.max.vec <- count.source
count.target <- count.target.raw
} else {
# TODO: throw
cat(sprintf('ERROR: ioapi.r: datavar.dims.n==%i\n', datavar.dims.n))
}
# start debugging
#print('fix.TFLAG: count.source==')
#print(count.source)
#print('fix.TFLAG: count.target==')
#print(count.target)
# end debugging
# for safety (and pedagogy), read in data one timestep at a time, dim-agnostically
for (i.timestep in 1:timesteps.n) {
#i.timestep <- 1
# Initialize start and count.source to read one timestep of the source datavar.
# start=(1,1,i), source.count=(DATE-TIME,VAR,i)
start[datavar.dims.n] <- i.timestep
count.source[datavar.dims.n] <- i.timestep
count.target[datavar.dims.n] <- i.timestep
# start debugging
#cat(sprintf('fix.TFLAG: for timestep==%i, count.source==\n', i.timestep))
#print(count.source)
#cat(sprintf('fix.TFLAG: for timestep==%i, count.target==\n', i.timestep))
#print(count.target)
# end debugging
# source.timestep <- ncvar_get(source.file, varid=source.datavar, start=start, count=count.source)
# should work, but class(source.datavar) == "matrix" ???
source.timestep <- ncvar_get(source.file, varid="TFLAG", start=start, count=count.source)
# read same size (count.source) in target, later truncating
# target.timestep.raw <- ncvar_get(target.file, varid=target.datavar, start=start, count=count.source)
target.timestep.raw <- ncvar_get(target.file, varid="TFLAG", start=start, count=count.source)
# start debugging
#cat(sprintf('fix.TFLAG: for timestep==%i, target.timestep.raw==\n', i.timestep))
#print(target.timestep.raw)
# end debugging
# Now "transpose" the appropriate values for the datavars we want:
# e.g., datavar that was 17th in source is 1st in target.
# Iterating over length(names.vec):
for (i.name.target in 1:target.datavar.vars.n) {
i.name.source <- source.indices.vec[i.name.target]
source.datetime <- source.timestep[,i.name.source]
target.timestep.raw[,i.name.target] <- source.datetime
}
# now write back new'n'improved (and resized) target.timestep
target.timestep <- target.timestep.raw
# dim(target.timestep) <- count.target
# count.target is 3d, but a single timestep is 2d
# dim(target.timestep) <- count.target[1:2]
# can't do that either: gotta truncate the matrix manually ?!?
# TODO: find the Rish way to do this!
as.vector(target.timestep) -> target.timestep.vec
length(target.timestep.vec) <- 2
dim(target.timestep.vec) <- c(2,1)
stopifnot(is.matrix(target.timestep.vec)) # ASSERT
target.timestep <- target.timestep.vec
# start debugging
cat(sprintf('fix.TFLAG: for timestep==%i, target.timestep==\n', i.timestep))
print(target.timestep)
# end debugging
# ncvar_put(target.file, varid=target.datavar, vals=target.timestep, start=start, count=count.target)
# ncvar_put(target.file, varid="TFLAG", vals=target.timestep, start=start, count=count.target)
target.file <- ncvar_put(target.file, varid="TFLAG", vals=target.timestep, start=start, count=count.target)
} # end for timestep
# start debugging
# something's happening to target.file before next call, so kludge :-(
cat(sprintf('fix.TFLAG: kludging target.file from target.fp==%s\n', target.fp))
target.file <- nc_open(target.fp, write=FALSE, readunlim=FALSE)
target.datavar <- ncvar_get(target.file, varid="TFLAG")
print('fix.TFLAG: target.datavar==')
print(target.datavar)
print('fix.TFLAG: exiting')
# end debugging
# return
target.file
} # end function fix.TFLAG
overwrite.layer.in.timestep <- function(
timestep, # to which the layer belongs
value.to.write=NA, # e.g.
dims.vec=dim(timestep), # assumed to be COL, ROW, LAY
i.lay=dim(timestep)[3] # index of layer to overwrite
) {
# start debugging
# cat(sprintf('overwrite.layer.in.timestep: layer to fix==%i\n', i.lay))
# cat(sprintf('overwrite.layer.in.timestep: start: sum(!is.na(layer))==%i\n',
# sum(!is.na(timestep[,,i.lay]))))
# print('overwrite.layer.in.timestep: timestep dimensions==')
# print(dims.vec)
# end debugging
layer.dims <- dims.vec[1:2] # LAY assumed to be 3rd dimension
layer <- rep(value.to.write, prod(layer.dims))
dim(layer) <- layer.dims
timestep[,,i.lay] <- layer
# debugging
# cat(sprintf('overwrite.layer.in.timestep: end: sum(!is.na(layer))==%i\n',
# sum(!is.na(timestep[,,i.lay]))))
# note: explicit `return` halts execution!
timestep
} # end function overwrite.layer.in.timestep
# get the names of the datavars defined in the file (including TFLAG),
# in the order in which they are defined, return as character vector
get.datavar.names.in.order <- function(
ioapi.file # class=ncdf4 from nc_open(...)
) {
vars.n <- ioapi.file$nvars
ret.vec <- character(0)
for (i.var in 1:vars.n) {
ret.vec[length(ret.vec) +1] <- ioapi.file$var[[ i.var ]]$name
}
# return
ret.vec
}