-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathKSensrf.py
309 lines (290 loc) · 11.9 KB
/
KSensrf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
"""Ensemble square-root filters for the 1-d Kuramoto-Sivashinsky eqn"""
import numpy as np
import sys
from KS import KS
from enkf import serial_ensrf, bulk_ensrf, etkf, letkf, etkf_modens,\
serial_ensrf_modens, bulk_enkf, getkf, getkf_modens
np.seterr(all='raise') # raise error when overflow occurs
if len(sys.argv) < 3:
msg="""python KSensrf.py covlocal method smoothlen covinflate1 covfinflate2
all variables are observed, assimilation interval given by dtassim,
nens ensemble members, observation error standard deviation = oberrstdev,
observation operator is smooth_len pt boxcar running mean or gaussian.
time mean error and spread stats printed to standard output.
covlocal: localization distance (distance at which Gaspari-Cohn polynomial goes
to zero).
method: =0 for serial Potter method
=1 for bulk Potter method (all obs at once)
=2 for ETKF (no localization applied)
=3 for LETKF (using observation localization)
=4 for serial Potter method with localization via modulation ensemble
=5 for ETKF with modulation ensemble
=6 for ETKF with modulation ensemble and perturbed obs
=7 for serial Potter method using sqrt of localized Pb ensemble
=8 for bulk EnKF (all obs at once) with perturbed obs.
=9 for GETKF (no localization applied)
=10 for GETKF (with modulated ensemble)
smoothlen: averaging kernel width for forward operator (0 if no averaging)
covinflate1,covinflate2: (optional) inflation parameters corresponding
to a and b in Hodyss and Campbell. If not specified, a=b=1. If covinflate2
<=0, relaxation to prior spread (RTPS) inflation used with a relaxation
coefficient equal to covinflate1."""
raise SystemExit(msg)
corrl = float(sys.argv[1])
method = int(sys.argv[2])
smooth_len = int(sys.argv[3])
use_gaussian = bool(int(sys.argv[4]))
covinflate1=1.; covinflate2=1.
if len(sys.argv) > 5:
# if covinflate2 > 0, use Hodyss and Campbell inflation,
# otherwise use RTPS inflation.
covinflate1 = float(sys.argv[5])
covinflate2 = float(sys.argv[6])
ntstart = 1000 # time steps to spin up truth run
ntimes = 81000 # ob times
nens = 5 # ensemble members
oberrstdev = 1.0; oberrvar = oberrstdev**2 # ob error
verbose = False # print error stats every time if True
dtassim = 2.0 # assimilation interval
# Gaussian or running average smoothing in H.
# for running average, smooth_len is half-width of boxcar.
# for gaussian, smooth_len is standard deviation.
thresh = 0.99 # threshold for modulated ensemble eigenvalue truncation.
# model parameters...
# for truth run
dt = 0.5; npts = 128
diffusion_truth = 1.0
# for forecast model (same as above for perfect model expt)
# for simplicity, assume dt and npts stay the same.
#diffusion = 0.9
diffusion = diffusion_truth
rstruth = np.random.RandomState(42) # fixed seed for truth run
rsens = np.random.RandomState() # varying seed for ob noise and ensemble initial conditions
# model instance for truth (nature) run
model = KS(N=npts,dt=dt,diffusion=diffusion_truth,rs=rstruth)
# mode instance for forecast ensemble
ensemble = KS(N=npts,members=nens,dt=dt,diffusion=diffusion,rs=rsens)
for nt in range(ntstart): # spinup truth run
model.advance()
# sample obs from truth, compute climo stats for model.
xx = []; tt = []
for nt in range(ntimes):
model.advance()
xx.append(model.x[0]) # single member
tt.append(float(nt)*model.dt)
xtruth = np.array(xx,np.float)
timetruth = np.array(tt,np.float)
xtruth_mean = xtruth.mean()
xprime = xtruth - xtruth_mean
xvar = np.sum(xprime**2,axis=0)/(ntimes-1)
xtruth_stdev = np.sqrt(xvar.mean())
if verbose:
print('climo for truth run:')
print('x mean =',xtruth_mean)
print('x stdev =',xtruth_stdev)
# forward operator.
# identity obs.
ndim = ensemble.n
h = np.eye(ndim)
# smoothing in forward operator
# gaussian or heaviside kernel.
if smooth_len > 0:
for j in range(ndim):
for i in range(ndim):
rr = float(i-j)
if i-j < -(ndim/2): rr = float(ndim-j+i)
if i-j > (ndim/2): rr = float(i-ndim-j)
r = np.fabs(rr)/smooth_len
if use_gaussian:
h[j,i] = np.exp(-r**2) # Gaussian
else: # running average (heaviside kernel)
if r <= 1:
h[j,i] = 1.
else:
h[j,i] = 0.
# normalize H so sum of weight is 1
h[j,:] = h[j,:]/h[j,:].sum()
obs = np.empty(xtruth.shape, xtruth.dtype)
for nt in range(xtruth.shape[0]):
obs[nt] = np.dot(h,xtruth[nt])
obs = obs + oberrstdev*rsens.standard_normal(size=obs.shape)
# spinup ensemble
ntot = xtruth.shape[0]
nspinup = ntstart
for n in range(ntstart):
ensemble.advance()
nsteps = np.int(dtassim/model.dt) # time steps in assimilation interval
if verbose:
print('ntstart, nspinup, ntot, nsteps =',ntstart,nspinup,ntot,nsteps)
if nsteps % 1 != 0:
raise(ValueError, 'assimilation interval must be an integer number of model time steps')
else:
nsteps = int(nsteps)
def ensrf(ensemble,xmean,xprime,h,obs,oberrvar,covlocal,method=1,z=None):
if method == 0: # ensrf with obs one at time
return serial_ensrf(xmean,xprime,h,obs,oberrvar,covlocal,covlocal)
elif method == 1: # ensrf with all obs at once
return bulk_ensrf(xmean,xprime,h,obs,oberrvar,covlocal)
elif method == 2: # etkf (no localization)
return etkf(xmean,xprime,h,obs,oberrvar)
elif method == 3: # letkf
return letkf(xmean,xprime,h,obs,oberrvar,covlocal)
elif method == 4: # serial ensrf using 'modulated' ensemble
return serial_ensrf_modens(xmean,xprime,h,obs,oberrvar,covlocal,z)
elif method == 5: # etkf using 'modulated' ensemble
return etkf_modens(xmean,xprime,h,obs,oberrvar,covlocal,z)
elif method == 6: # etkf using 'modulated' ensemble w/pert obs
return etkf_modens(xmean,xprime,h,obs,oberrvar,covlocal,z,rs=rsens,po=True)
elif method == 7: # serial ensrf using sqrt of localized Pb
return serial_ensrf_modens(xmean,xprime,h,obs,oberrvar,covlocal,None)
elif method == 8: # enkf with perturbed obs all at once
return bulk_enkf(xmean,xprime,h,obs,oberrvar,covlocal,rsens)
elif method == 9: # getkf with no localization
return getkf(xmean,xprime,h,obs,oberrvar)
elif method == 10: # getkf with modulated ensemble
return getkf_modens(xmean,xprime,h,obs,oberrvar,covlocal,z,rs=rsens,po=True)
else:
raise ValueError('illegal value for enkf method flag')
# define localization matrix.
covlocal = np.eye(ndim)
if corrl < 2*ndim:
for j in range(ndim):
for i in range(ndim):
rr = float(i-j)
if i-j < -(ndim/2): rr = float(ndim-j+i)
if i-j > (ndim/2): rr = float(i-ndim-j)
r = np.fabs(rr)/corrl; taper = 0.0
# Bohman taper (Gneiting 2002, doi:10.1006/jmva.2001.2056,
# equation, eq 21)
#if r < 1.:
# taper = (1.-r)*np.cos(np.pi*r) + np.sin(np.pi*r)/np.pi
# Gaspari-Cohn polynomial (Gneiting 2002, eq 23).
# Figure 3 of that paper compares Bohman and GC tapers.
rr = 2.*r
if r <= 0.5:
taper = ((( -0.25*rr +0.5 )*rr +0.625 )*rr -5.0/3.0 )*rr**2+1.
elif r > 0.5 and r < 1.:
taper = (((( rr/12.0 -0.5 )*rr +0.625 )*rr +5.0/3.0 )*rr -5.0 )*rr \
+ 4.0 - 2.0 / (3.0 * rr)
covlocal[j,i]=taper
# compute square root of covlocal
if method in [4,5,6,10]:
evals, eigs = np.linalg.eigh(covlocal)
evals = np.where(evals > 1.e-10, evals, 1.e-10)
evalsum = evals.sum(); neig = 0
frac = 0.0
while frac < thresh:
frac = evals[ndim-neig-1:ndim].sum()/evalsum
neig += 1
#print 'neig = ',neig
zz = (eigs*np.sqrt(evals/frac)).T
z = zz[ndim-neig:ndim,:]
else:
neig = 0
z = None
# run assimilation.
fcsterr = []
fcsterr1 = []
fcstsprd = []
analerr = []
analsprd = []
diverged = False
fsprdmean = np.zeros(ndim,np.float)
fsprdobmean = np.zeros(ndim,np.float)
asprdmean = np.zeros(ndim,np.float)
ferrmean = np.zeros(ndim,np.float)
aerrmean = np.zeros(ndim,np.float)
corrmean = np.zeros(ndim,np.float)
corrhmean = np.zeros(ndim,np.float)
for nassim in range(0,ntot,nsteps):
# assimilate obs
xmean = ensemble.x.mean(axis=0)
xmean_b = xmean.copy()
xprime = ensemble.x - xmean
# calculate background error, sprd stats.
ferr = (xmean - xtruth[nassim])**2
if np.isnan(ferr.mean()):
diverged = True
break
fsprd = (xprime**2).sum(axis=0)/(ensemble.members-1)
corr = (xprime.T*xprime[:,ndim//2]).sum(axis=1)/float(ensemble.members-1)
hxprime = np.dot(xprime,h)
fsprdob = (hxprime**2).sum(axis=0)/(ensemble.members-1)
corrh = (xprime.T*hxprime[:,ndim//2]).sum(axis=1)/float(ensemble.members-1)
if nassim >= nspinup:
fsprdmean = fsprdmean + fsprd
fsprdobmean = fsprdobmean + fsprdob
corrmean = corrmean + corr
corrhmean = corrhmean + corrh
ferrmean = ferrmean + ferr
fcsterr.append(ferr.mean()); fcstsprd.append(fsprd.mean())
fcsterr1.append(xmean - xtruth[nassim])
# update state estimate.
xmean,xprime =\
ensrf(ensemble,xmean,xprime,h,obs[nassim,:],oberrvar,covlocal,method=method,z=z)
# calculate analysis error, sprd stats.
aerr = (xmean - xtruth[nassim])**2
asprd = (xprime**2).sum(axis=0)/(ensemble.members-1)
if nassim >= nspinup:
asprdmean = asprdmean + asprd
aerrmean = aerrmean + aerr
analerr.append(aerr.mean()); analsprd.append(asprd.mean())
if verbose:
print(nassim,timetruth[nassim],np.sqrt(ferr.mean()),np.sqrt(fsprd.mean()),np.sqrt(aerr.mean()),np.sqrt(asprd.mean()))
if covinflate2 > 0:
# Hodyss and Campbell inflation.
inc = xmean - xmean_b
inf_fact = np.sqrt(covinflate1 + \
(asprd/fsprd**2)*((fsprd/ensemble.members) + covinflate2*(2.*inc**2/(ensemble.members-1))))
else:
# relaxation to prior spread inflation
asprd = np.sqrt(asprd); fsprd = np.sqrt(fsprd)
inf_fact = 1.+covinflate1*(fsprd-asprd)/asprd
xprime *= inf_fact
# run forecast model.
ensemble.x = xmean + xprime
for n in range(nsteps):
ensemble.advance()
# print out time mean stats.
# error and spread are normalized by observation error.
if diverged:
print(method,len(fcsterr),corrl,covinflate1,covinflate2,oberrstdev,np.nan,np.nan,np.nan,np.nan,neig)
else:
ncount = len(fcstsprd)
fcsterr = np.array(fcsterr)
fcsterr1 = np.array(fcsterr1)
fcstsprd = np.array(fcstsprd)
analerr = np.array(analerr)
analsprd = np.array(analsprd)
fstdev = np.sqrt(fcstsprd.mean())
astdev = np.sqrt(analsprd.mean())
asprdmean = asprdmean/ncount
aerrmean = aerrmean/ncount
fsprdmean = fsprdmean/ncount
fsprdobmean = fsprdobmean/ncount
corrmean = corrmean/ncount
corrhmean = corrhmean/ncount
fstd = np.sqrt(fsprdmean)
fstdob = np.sqrt(fsprdobmean)
covmean = corrmean; corrmean = corrmean/(fstd*fstd[ndim//2])
covhmean = corrhmean; corrhmean = corrhmean/(fstd*fstdob[ndim//2])
fcsterrcorr =\
(fcsterr1.T*fcsterr1[:,ndim//2]).sum(axis=1)/float(fcsterr1.shape[0]-1)
ferrstd = np.sqrt((fcsterr1**2).sum(axis=0)/float(fcsterr1.shape[0]-1))
errcovmean = fcsterrcorr
fcsterrcorr = fcsterrcorr/(ferrstd*ferrstd[ndim//2])
#import matplotlib.pyplot as plt
#plt.plot(np.arange(ndim),corrmean,color='k',label='r')
#plt.plot(np.arange(ndim),corrhmean,color='b',label='r (x vs hx)')
#plt.plot(np.arange(ndim),h[:,ndim//2]/h.max(),color='r',label='H')
#plt.plot(np.arange(ndim),covlocal[:,ndim//2],'k:',label='L')
#plt.xlim(0,ndim)
#plt.legend()
#plt.figure()
#plt.plot(np.arange(ndim),covmean,color='b',label='mean ens cov')
#plt.plot(np.arange(ndim),errcovmean,color='r',label='ens mean errcov')
#plt.xlim(0,ndim)
#plt.legend()
#plt.show()
print(method,ncount,corrl,covinflate1,covinflate2,oberrstdev,np.sqrt(fcsterr.mean()),fstdev,\
np.sqrt(analerr.mean()),astdev,neig)