-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbashUtilities.sh
365 lines (337 loc) · 12.3 KB
/
bashUtilities.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# description---------------------------------------------------------
# Source me
# code----------------------------------------------------------------
# constants-----------------------------------------------------------
# TODO: take switches for help, debugging, no/eval, target drive
# Following does not work with `source`
# THIS="$0"
# THIS_FN="$(basename ${THIS})"
# THIS_DIR="$(dirname ${THIS})"
THIS="${HOME}/ioapi-hack-R/bashUtilities.sh"
THIS_FN="$(basename ${THIS})"
THIS_DIR="$(dirname ${THIS})"
# debugging: get commandline==all positional parameters
#echo -e "hostname=$(hostname): cmdline='${@}', THIS_DIR='${THIS_DIR}', THIS_FN='${THIS_FN}'"
# TODO: read from CCTM Makefile
IOAPI_VERSION="3.1" # desired
NCO_VERSION="4.0.5" # version on terrae; infinity has 4.0.8
HPCC_R_PATH="/share/linux86_64/bin"
# `ncdump` now on hpcc in /usr/bin
#HPCC_NCDUMP_PATH="/share/linux86_64/grads/supplibs-2.2.0/x86_64-unknown-linux-gnu/bin"
HPCC_IOAPI_LIB_PATH="/project/air5/roche/CMAQ-5-eval/lib/ioapi_${IOAPI_VERSION}"
HPCC_IOAPI_BIN_PATH="${HPCC_IOAPI_LIB_PATH}"
HPCC_NCO_PATH="/share/linux86_64/nco/nco-${NCO_VERSION}/bin"
TERRAE_IOAPI_MODULE="ioapi-${IOAPI_VERSION}"
TERRAE_NCO_MODULE="nco-${NCO_VERSION}" # in `module avail` as of May 2012
# this fixes removed vars, and dims and global attributes that must reflect them
FIX_VARS_SCRIPT="${THIS_DIR}/processVars.r"
# functions-----------------------------------------------------------
# ensure IOAPI is on path
# TODO: ensure your hostname matches here!
# TODO: setup packages={ncdf4} on infinity, not just amad
function setupPaths {
H="$(hostname)"
case "${H}" in
terra*)
# echo -e "${H} is on terrae"
setupModules
;;
amad*)
# echo -e "${H} is on hpcc"
# as of 22 May 12, on the hpcc R servers NCO is installed normally, in /usr/bin
# addPath "${HPCC_NCO_PATH}"
addPath "${HPCC_IOAPI_BIN_PATH}"
addPath "${HPCC_R_PATH}"
addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
;;
global*)
# echo -e "${H} is on hpcc"
# addPath "${HPCC_NCO_PATH}"
addPath "${HPCC_IOAPI_BIN_PATH}"
addPath "${HPCC_R_PATH}"
addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
;;
imaster*) # == infinity
# echo -e "${H} is on hpcc"
echo -e "For R packages such as ncdf4, must run on amad"
addPath "${HPCC_NCO_PATH}"
addPath "${HPCC_IOAPI_BIN_PATH}"
addPath "${HPCC_R_PATH}"
addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
;;
inode*) # == node39
# echo -e "${H} is on hpcc"
echo -e "For R packages such as ncdf4, must run on amad"
addPath "${HPCC_NCO_PATH}"
addPath "${HPCC_IOAPI_BIN_PATH}"
addPath "${HPCC_R_PATH}"
addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
;;
*)
echo -e "unknown ${H}"
# exit 1
;;
esac
} # end function setupPaths
# add $1 to PATH if not already there
function addPath {
DIR="$1"
if [[ -n "${DIR}" ]] ; then
if [ -d "${DIR}" ] ; then
if [[ ":${PATH}:" != *":${DIR}:"* ]] ; then
PATH="${DIR}:${PATH}"
else
echo -e "PATH contains '${DIR}'"
fi
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: '${DIR}' is not a directory" 1>&2
fi
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: DIR not defined" 1>&2
fi
}
# add $1 to LD_LIBRARY_PATH if not already there
function addLdLibraryPath {
DIR="$1"
if [[ -n "${DIR}" ]] ; then
if [ -d "${DIR}" ] ; then
if [[ ":${LD_LIBRARY_PATH}:" != *":${DIR}:"* ]] ; then
LD_LIBRARY_PATH="${DIR}:${LD_LIBRARY_PATH}"
else
echo -e "LD_LIBRARY_PATH contains '${DIR}'"
fi
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: '${DIR}' is not a directory" 1>&2
fi
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: DIR not defined" 1>&2
fi
}
# If your computing platform uses Environment Modules (
# http://modules.sourceforge.net/
# ), load modules for current NCO and IOAPI, noting
# how this syntax differs from the commandline.
# (Thanks, Barron Henderson for noting this.)
# TODO: test for non/existence of paths above!
function setupModules {
# for CMD in \
# "modulecmd bash add ${TERRAE_NCO_MODULE} ${TERRAE_IOAPI_MODULE}" \
# ; do
# echo -e "$ ${CMD}"
# eval "${CMD}"
# done
TEMPFILE="$(mktemp)"
modulecmd bash add ${TERRAE_NCO_MODULE} ${TERRAE_IOAPI_MODULE} > ${TEMPFILE}
source ${TEMPFILE}
}
# Window a single IOAPI file. Convenience for callers.
# CONTRACT:
# * arguments are not checked here, must be checked by callers
# * m3tools/m3wndw must be in path
# * INFP != OUFP: m3wndw wants separate handles
function windowFile {
# EMPIRICAL NOTE:
# m3wndw (perhaps all of m3tools) truncate envvars @ length=16!
# e.g., "M3WNDW_INPUT_FILE" -> "M3WNDW_INPUT_FIL", which fails lookup.
# ASSERT: good arguments, must be tested by caller
export INFP="$1"
export OUFP="$2"
M3WNDW_INPUT_FP="$3"
# INFP, OUFP are handles for `m3wndw`: don't substitute in shell!
# start debugging
# echo -e "windowFile: about to call m3wndw with INFP='${INFP}', OUFP='${OUFP}', M3WNDW_INPUT_FP='${M3WNDW_INPUT_FP}'"
# end debugging
for CMD in \
"ls -alt ${INFP} ${OUFP} ${M3WNDW_INPUT_FP}" \
"m3wndw INFP OUFP < ${M3WNDW_INPUT_FP}" \
"ls -alt ${INFP} ${OUFP}" \
"ncdump -h ${OUFP} | head -n 20" \
; do
echo -e "$ ${CMD}"
eval "${CMD}"
done
} # end function windowFile
# Remove all netCDF datavars other than the one named by VAR_NAME.
# For IOAPI, subsequently gotta fix
# * global attr=VAR-LIST
# * coordinate var=VAR
# * data var=TFLAG
# Note I copy files to output, *then* work on them, because that's what
# R package=ncdf4 seems to want.
# CONTRACT:
# * arguments are not checked here, must be checked by callers
# * nco/ncks must be in path
# * if output path already exists, it gets overwritten
function stripOtherDatavars {
VAR_NAME="$1"
INPUT_FP="$2"
OUTPUT_FP="$3"
if [[ -r "${OUTPUT_FP}" ]] ; then
if [[ -w "${OUTPUT_FP}" ]] ; then
DEBUG echo -e "ERROR? ${FUNCNAME[0]}: output file='${OUTPUT_FP}' already exists"
else
echo -e "ERROR: ${FUNCNAME[0]}: output file='${OUTPUT_FP}' exists but can't be written" 1>&2
exit 1
fi
fi
if [[ -r "${FIX_VARS_SCRIPT}" ]] ; then
TEMPFILE="$(mktemp)" # for R output
INPUT_FN="$(basename ${INPUT_FP})"
INPUT_PREFIX="${INPUT_FN%.*}"
INPUT_SUFFIX="${INPUT_FN##*.}"
OUTPUT_DIR="$(dirname ${OUTPUT_FP})"
RAW_STRIPPED_FP="${OUTPUT_DIR}/${INPUT_PREFIX}_stripped.${INPUT_SUFFIX}"
DEBUG echo -e "INPUT_PREFIX='${INPUT_PREFIX}'"
DEBUG echo -e "INPUT_SUFFIX='${INPUT_SUFFIX}'"
DEBUG echo -e "RAW_STRIPPED_FP='${RAW_STRIPPED_FP}'"
# gotta quote the double quotes :-(
# need INPUT_FP to get original TFLAG?
# TODO: should "cat ${TEMPFILE}" after running R, but ${FIX_VARS_SCRIPT} is too verbose
for CMD in \
"ncks -O -v ${VAR_NAME},TFLAG ${INPUT_FP} ${RAW_STRIPPED_FP}" \
"cp ${RAW_STRIPPED_FP} ${OUTPUT_FP}" \
"R CMD BATCH --vanilla --slave '--args \
datavar.name=\"${VAR_NAME}\" \
epic.input.fp=\"${RAW_STRIPPED_FP}\" \
epic.output.fp=\"${OUTPUT_FP}\" \
' \
${FIX_VARS_SCRIPT} ${TEMPFILE}" \
"rm ${RAW_STRIPPED_FP}" \
; do
echo -e "$ ${CMD}"
eval "${CMD}"
done
# ncdump -v TFLAG ${OUTPUT_FP}
export M3STAT_FILE="${OUTPUT_FP}"
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: script='${FIX_VARS_SCRIPT}' is not readable" 1>&2
exit 2
fi # end testing -r "${FIX_VARS_SCRIPT}"
} # end function stripOtherDatavars
# Rename datavar INPUT_VAR_NAME to OUTPUT_VAR_NAME.
# For IOAPI, subsequently gotta fix
# * global attr=VAR-LIST
# * coordinate var=VAR
# * data var=TFLAG
# Note I copy files to output, *then* work on them, because that's what
# R package=ncdf4 seems to want.
# CONTRACT:
# * arguments are not checked here, must be checked by callers
# * nco/ncrename must be in path
function renameDatavar {
INPUT_VAR_NAME="$1"
OUTPUT_VAR_NAME="$2"
NETCDF_FP="$3" # both input and output
if [[ -r "${FIX_VARS_SCRIPT}" ]] ; then
TEMPFILE="$(mktemp)" # for R output
OUTPUT_DIR="$(dirname ${NETCDF_FP})"
INPUT_FN="$(basename ${NETCDF_FP})"
INPUT_PREFIX="${INPUT_FN%.*}"
INPUT_SUFFIX="${INPUT_FN##*.}"
RAW_RENAMED_FP="${OUTPUT_DIR}/${INPUT_PREFIX}_renamed.${INPUT_SUFFIX}"
DEBUG echo -e "INPUT_PREFIX='${INPUT_PREFIX}'"
DEBUG echo -e "INPUT_SUFFIX='${INPUT_SUFFIX}'"
DEBUG echo -e "RAW_RENAMED_FP='${RAW_RENAMED_FP}'"
# gotta quote the double quotes :-(
# gotta create extra file for R?
# TODO: should "cat ${TEMPFILE}" after running R, but ${FIX_VARS_SCRIPT} is too verbose
for CMD in \
"ncrename -O -v ${INPUT_VAR_NAME},${OUTPUT_VAR_NAME} ${NETCDF_FP} ${RAW_STRIPPED_FP}" \
"cp ${RAW_STRIPPED_FP} ${NETCDF_FP}" \
"R CMD BATCH --vanilla --slave '--args \
datavar.name=\"${OUTPUT_VAR_NAME}\" \
epic.input.fp=\"${RAW_STRIPPED_FP}\" \
epic.output.fp=\"${NETCDF_FP}\" \
' \
${FIX_VARS_SCRIPT} ${TEMPFILE}" \
"rm ${RAW_STRIPPED_FP}" \
; do
# but only if the first word is a command?
# no: `ncrename` is there, but we're not seeing it :-(
# if [[ -n "$(declare -f ${CMD%% *})" ]] ; then
echo -e "$ ${FUNCNAME[0]}:${CMD}" 1>&2
eval "${CMD}"
# else
# echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: command='${CMD%% *}' not defined, stopping"
# exit 1
# fi # end testing commands
done
export M3STAT_FILE="${OUTPUT_FP}"
else
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: script='${FIX_VARS_SCRIPT}' is not readable" 1>&2
exit 2
fi # end testing -r "${FIX_VARS_SCRIPT}"
} # end function renameDatavar
# "Comments" lines from running iff _DEBUG='on' (which can be export'ed by caller),
# and runs with `set xtrace`
# For `echo`, use DEBUG()
function DEBUGx {
if [[ "${_DEBUG}" == 'on' ]] ; then
set -x
"$@" 1>&2
set +x
fi
} # end function DEBUG
# "Comments" lines from running iff _DEBUG='on'
# (which can be export'ed by caller)
function DEBUG {
if [[ "${_DEBUG}" == 'on' ]] ; then
"$@" 1>&2
fi
} # end function DEBUG
# Stop if cannot find datavar in file. kludged implementation!
# CONTRACT: arguments tested by caller
function exitIfDatavarNotFound {
NETCDF_FP="$1"
VAR_NAME="$2"
VAR_ATTR_NAME="$3"
VAR_ATTR_VAL="$4"
KEY_NAME="${VAR_NAME}:${VAR_ATTR_NAME}"
# add no single quotes to search command!
SEARCH_RESULTS="$(ncdump -h ${NETCDF_FP} | fgrep -e ${KEY_NAME} | fgrep -e ${VAR_ATTR_VAL})"
if [[ -z "${SEARCH_RESULTS}" ]] ; then
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: could not find varname='${VAR_NAME}' in netCDF file='${NETCDF_FP}'" 1>&2
exit 1
else
DEBUG echo -e "${FUNCNAME[0]}: 'ncdump -h ${NETCDF_FP} | fgrep -e ${KEY_NAME} | fgrep -e ${VAR_ATTR_VAL}' found ${SEARCH_RESULTS}"
fi
}
# Stop on finding datavar in file. kludged implementation!
# CONTRACT: arguments tested by caller
function exitIfDatavarIsFound {
NETCDF_FP="$1"
VAR_NAME="$2"
VAR_ATTR_NAME="$3"
VAR_ATTR_VAL="$4"
KEY_NAME="${VAR_NAME}:${VAR_ATTR_NAME}"
# add no single quotes to search command!
SEARCH_RESULTS="$(ncdump -h ${NETCDF_FP} | fgrep -e ${KEY_NAME} | fgrep -e ${VAR_ATTR_VAL})"
if [[ -n "${SEARCH_RESULTS}" ]] ; then
echo -e "ERROR: ${THIS_FN}:${FUNCNAME[0]}: found varname='${VAR_NAME}' in netCDF file='${NETCDF_FP}'" 1>&2
exit 1
else
DEBUG echo -e "${FUNCNAME[0]}: nothing found for 'ncdump -h ${NETCDF_FP} | fgrep -e ${KEY_NAME} | fgrep -e ${VAR_ATTR_VAL}'"
fi
}
# Stop if cannot find attribute.
# For datavar attribute, pass datavar name in $3; else, omit or pass null string.
# Don't use return value, rely on side effect on stdout.
# CONTRACT: dependency availability, arguments tested by caller
function findAttributeInFile {
ATTR_NAME="$1"
NETCDF_FP="$2"
VAR_NAME="$3"
KEY_NAME="${VAR_NAME}:${VAR_ATTR_NAME}" # note colon needed for `ncdump`
# add no single quotes to search command! which is our "return value"
ncdump -h "${NETCDF_FP}" | fgrep -e "${KEY_NAME}"
} # end function findAttributeInFile
# Stop if cannot find datavar in file. kludged implementation!
# Don't use return value, rely on side effect on stdout.
# CONTRACT: dependency availability, arguments tested by caller
function findDatavarInFile {
VAR_NAME="$1"
NETCDF_FP="$2"
# kludge: also pass the name of an attribute of the datavar, since we're only `ncdump`ing
VAR_ATTR_NAME="$3"
findDatavarAttributeInFile "${VAR_ATTR_NAME}" "${NETCDF_FP}" "${VAR_NAME}"
} # end function findDatavarInFile