Skip to content

Commit

Permalink
Updates from review
Browse files Browse the repository at this point in the history
  • Loading branch information
arm61 committed Nov 23, 2023
1 parent e672236 commit a846199
Show file tree
Hide file tree
Showing 18 changed files with 711 additions and 186 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ We then perform Bayesian regression to sample the distribution of linear models
<a href="https://orcid.org/0000-0001-9722-5676">Samuel W. Coles</a>
and
<a href="https://orcid.org/0000-0002-3056-8233">Benjamin J. Morgan</a>&dagger;<br>
&ast;<a href="mailto:[email protected]">andrew.mccluskey@ess.eu</a>/&dagger;<a href="mailto:[email protected]">[email protected]</a>
&ast;<a href="mailto:[email protected]">andrew.mccluskey@bristol.ac.uk</a>/&dagger;<a href="mailto:[email protected]">[email protected]</a>
</p>

---
Expand Down
57 changes: 57 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,46 @@ rule rw_4096_kinisi_true_128:
script:
f'src/code/random_walks/kinisi_rw.py'

# Random walks simulations
rule rw_4096_pyblock_true_128:
input:
f'src/code/random_walks/kinisi_pyblock_rw.py',
'src/code/random_walks/random_walk.py'
output:
f'src/data/random_walks/pyblock/rw_1_128_128_s4096.npz'
conda:
'environment.yml'
cache:
True
params:
jump=2.4494897428,
atoms=128,
length=128,
correlation='true',
n=4096
script:
f'src/code/random_walks/kinisi_pyblock_rw.py'

# Random walks simulations
rule rw_4096_pyblock_modelfree_true_128:
input:
f'src/code/random_walks/kinisi_pyblock_modelfree_rw.py',
'src/code/random_walks/random_walk.py'
output:
f'src/data/random_walks/pyblock_modelfree/rw_1_128_128_s4096.npz'
conda:
'environment.yml'
cache:
True
params:
jump=2.4494897428,
atoms=128,
length=128,
correlation='true',
n=4096
script:
f'src/code/random_walks/kinisi_pyblock_modelfree_rw.py'

# Length variation random walks
for length in [16, 32, 64, 128, 256, 512, 1024]:
rule:
Expand Down Expand Up @@ -379,3 +419,20 @@ for start_diff in [0, 2, 4, 6, 8, 10, 15, 20]:
script:
"src/code/llzo/true_ls.py"

rule:
name:
f"glswlsols_D_llzo_{start_diff}"
input:
'src/code/llzo/glswlsols.py',
[f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 16, 1)]
output:
f'src/data/llzo/glswlsols_{start_diff}.npz'
conda:
'environment.yml'
cache:
True
params:
start_diff=start_diff
script:
"src/code/llzo/glswlsols.py"

8 changes: 4 additions & 4 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ channels:
dependencies:
- numpy=1.23.1
- pip=21.3.1
- python=3.9.7
- python=3.10
- scipy=1.9.3
- scikit-learn=1.1.3
- mdanalysis=2.3.0
- pip:
- git+https://github.com/matplotlib/matplotlib.git
- dynesty==1.0.1
- uravu==1.2.9
- kinisi==0.6.3
- pyblock==0.6
- statsmodels==0.14.0
- git+https://github.com/bjmorgan/kinisi.git@block
16 changes: 6 additions & 10 deletions showyourwork.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,9 @@ dependencies:
src/scripts/stat_eff.py:
- src/data/random_walks/stat_eff.npz
- src/scripts/utils/_fig_params.py
src/scripts/efficiency.py:
- src/data/random_walks/efficiency.npz
- src/scripts/utils/_fig_params.py
src/scripts/true_llzo.py:
{% for start_diff in [0, 2, 4, 6, 8, 10, 15, 20] %}
- src/data/llzo/true_{{start_diff}}.npz
{% for n in range(0, 16, 1) %}
- src/data/llzo/diffusion_{{n}}_{{start_diff}}.npz
{% endfor %}
{% endfor %}
src/scripts/glswlsols_llzo.py:
- src/data/llzo/glswlsols_10.npz
src/scripts/pyblock.py:
- src/data/random_walks/kinisi/rw_1_128_128_s4096.npz
- src/data/random_walks/pyblock/rw_1_128_128_s4096.npz
- src/data/random_walks/pyblock_modelfree/rw_1_128_128_s4096.npz
51 changes: 51 additions & 0 deletions src/code/llzo/glswlsols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Andrew R. McCluskey
# Distributed under the terms of the MIT License
# author: Andrew R. McCluskey (arm61)

import numpy as np
from scipy.stats import linregress
from scipy.linalg import pinvh
from emcee import EnsembleSampler
from scipy.optimize import minimize
from kinisi.matrix import find_nearest_positive_definite
from kinisi.diffusion import _straight_line

np.random.seed(1)

start_diff = snakemake.params['start_diff']

timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
d = np.zeros((16, 8, 1, timestep.size))
for i in range(0, 16, 1):
d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']

max_ngp = np.argwhere(timestep > start_diff)[0][0]
true_msd = d.reshape(-1, d.shape[-1])[:, max_ngp:]
true_cov = np.cov(true_msd.T)
timestep = timestep[max_ngp:]

y = true_msd.T
A = np.array([timestep, np.ones(timestep.size)]).T
W = true_cov
V = np.diag(true_cov.diagonal())

g1 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A))),
np.matmul(A.T, np.matmul(np.linalg.pinv(W), y)))[0] / 6
c1 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A)))[0][0]) / 6
g2 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A))),
np.matmul(A.T, np.matmul(np.linalg.pinv(V), y)))[0] / 6
c2 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A)))[0][0]) / 6
g3 = np.matmul(np.linalg.inv(np.matmul(A.T, A)), np.matmul(A.T, y))[0] / 6

c3 = []
for i in true_msd:
c3.append(linregress(timestep, i).stderr / 6)

np.savez(f'src/data/llzo/glswlsols_{start_diff}.npz',
gls_pop=g1,
wls_pop=g2,
ols_pop=g3,
gls_est=c1,
wls_est=c2,
ols_est=np.mean(c3))
26 changes: 13 additions & 13 deletions src/code/llzo/many_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,25 @@ def universe_slice(u: mda.Universe, data_file: str, slice_start: int, slice_end:


u = mda.Universe(file, file)
uu = universe_slice(u, file, 0, 500)
uu = universe_slice(u, file, 0, 2000)
step_skip = 100 # sampling rate
timestep = 5.079648e-4 # ps
da_params = {'specie': 'Li', 'time_step': timestep, 'step_skip': step_skip}
p = MDAnalysisParser(uu, **da_params)

ngp = np.zeros((8, 4, p.delta_t.size))
no = np.zeros((8, 4, p.delta_t.size))
dt = np.zeros((8, 4, p.delta_t.size))
msd = np.zeros((8, 4, p.delta_t.size))
msd_true = np.zeros((8, 4, p.delta_t.size))
msd_std = np.zeros((8, 4, p.delta_t.size))
cov = np.zeros((8, 4, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
d = np.zeros((8, 4, 3200))
intercept = np.zeros((8, 4, 3200))
ngp = np.zeros((8, 1, p.delta_t.size))
no = np.zeros((8, 1, p.delta_t.size))
dt = np.zeros((8, 1, p.delta_t.size))
msd = np.zeros((8, 1, p.delta_t.size))
msd_true = np.zeros((8, 1, p.delta_t.size))
msd_std = np.zeros((8, 1, p.delta_t.size))
cov = np.zeros((8, 1, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
d = np.zeros((8, 1, 3200))
intercept = np.zeros((8, 1, 3200))

for m in range(0, 8, 1):
for i, slice in enumerate(range(0, 2000, 500)):
uu = universe_slice(u, file, slice, slice+500)
for i, slice in enumerate(range(0, 2000, 2000)):
uu = universe_slice(u, file, slice, slice+2000)

rng = np.random.RandomState(42)
np.random.seed(42)
Expand All @@ -76,7 +76,7 @@ def universe_slice(u: mda.Universe, data_file: str, slice_start: int, slice_end:
msd_true[m, i, t] = np.sum(j[m::8, ::p.timesteps[t]]**2, axis=-1).mean()

b = MSDBootstrap(p.delta_t, n_disp_3d, p._n_o / 8)
b.diffusion(**{'dt_skip': start_diff, 'random_state': rng})
b.diffusion(start_diff, random_state=rng)
ngp[m, i] = b.ngp
no[m, i] = p._n_o / 8
dt[m, i] = b.dt
Expand Down
2 changes: 1 addition & 1 deletion src/code/llzo/true_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
d = np.zeros((16, 8, 4, timestep.size))
d = np.zeros((16, 8, 1, timestep.size))
for i in range(0, 16, 1):
d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']

Expand Down
46 changes: 46 additions & 0 deletions src/code/random_walks/kinisi_pyblock_modelfree_rw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) Andrew R. McCluskey
# Distributed under the terms of the MIT License
# author: Andrew R. McCluskey (arm61)

import numpy as np
from kinisi.diffusion import MSDBootstrap
from tqdm import tqdm
from random_walk import get_disp3d, walk

jump = snakemake.params['jump']
atoms = snakemake.params['atoms']
length = int(snakemake.params['length'])
size = int(snakemake.params['n'])

timestep = np.arange(1, length + 1, 1, dtype=int)
data = np.zeros((size, timestep.size, 4))
covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
npd_covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
n_o = np.zeros((size, timestep.size))
diff_c = np.zeros((size, 3200))
intercept = np.zeros((size, 3200))

for seed in tqdm(range(size)):
rng = np.random.RandomState(seed)
np.random.seed(seed)
disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)

diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False, block=True)
diff.diffusion(5, model=False, random_state=rng, progress=False)
diff_c[seed] = diff.gradient.samples / 6
intercept[seed] = diff.intercept.samples
data[seed, :, 0] = diff.dt
data[seed, :, 1] = diff.n
data[seed, :, 2] = diff.s
data[seed, 4:, 3] = diff._model_v
npd_covariance[seed] = diff._npd_covariance_matrix
covariance[seed] = diff.covariance_matrix
n_o[seed] = diff._n_o

np.savez(f'src/data/random_walks/pyblock_modelfree/rw_1_{atoms}_{length}_s{size}.npz',
diff_c=diff_c,
intercept=intercept,
data=data,
covariance=covariance,
npd_covariance=npd_covariance,
n_o=n_o)
46 changes: 46 additions & 0 deletions src/code/random_walks/kinisi_pyblock_rw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) Andrew R. McCluskey
# Distributed under the terms of the MIT License
# author: Andrew R. McCluskey (arm61)

import numpy as np
from kinisi.diffusion import MSDBootstrap
from tqdm import tqdm
from random_walk import get_disp3d, walk

jump = snakemake.params['jump']
atoms = snakemake.params['atoms']
length = int(snakemake.params['length'])
size = int(snakemake.params['n'])

timestep = np.arange(1, length + 1, 1, dtype=int)
data = np.zeros((size, timestep.size, 4))
covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
npd_covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
n_o = np.zeros((size, timestep.size))
diff_c = np.zeros((size, 3200))
intercept = np.zeros((size, 3200))

for seed in tqdm(range(size)):
rng = np.random.RandomState(seed)
np.random.seed(seed)
disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)

diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False, block=True)
diff.diffusion(5, random_state=rng, progress=False)
diff_c[seed] = diff.gradient.samples / 6
intercept[seed] = diff.intercept.samples
data[seed, :, 0] = diff.dt
data[seed, :, 1] = diff.n
data[seed, :, 2] = diff.s
data[seed, 4:, 3] = diff._model_v
npd_covariance[seed] = diff._npd_covariance_matrix
covariance[seed] = diff.covariance_matrix
n_o[seed] = diff._n_o

np.savez(f'src/data/random_walks/pyblock/rw_1_{atoms}_{length}_s{size}.npz',
diff_c=diff_c,
intercept=intercept,
data=data,
covariance=covariance,
npd_covariance=npd_covariance,
n_o=n_o)
2 changes: 1 addition & 1 deletion src/code/random_walks/kinisi_rw.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)

diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False)
diff.diffusion(use_ngp=False, random_state=rng, dt_skip=4, progress=False)
diff.diffusion(5, random_state=rng, progress=False)
diff_c[seed] = diff.gradient.samples / 6
intercept[seed] = diff.intercept.samples
data[seed, :, 0] = diff.dt
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
type = 'kinisi'

dllzo_true = np.load(paths.data / "llzo/true_10.npz")['diff_c']
d = np.zeros((16, 8, 4, 3200))
d = np.zeros((16, 8, 1, 3200))
for i in range(0, 16, 1):
d[i] = np.load(paths.data / f'llzo/diffusion_{i}_10.npz')['d']
d = d.reshape(-1, 3200)
Expand Down
Loading

0 comments on commit a846199

Please sign in to comment.