Updates from review

arm61 · Nov 23, 2023 · a846199 · a846199
1 parent e672236
commit a846199
Show file tree

Hide file tree

Showing 18 changed files with 711 additions and 186 deletions.
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ We then perform Bayesian regression to sample the distribution of linear models
 <a href="https://orcid.org/0000-0001-9722-5676">Samuel W. Coles</a> 
 and 
 <a href="https://orcid.org/0000-0002-3056-8233">Benjamin J. Morgan</a>&dagger;<br>
-&ast;<a href="mailto:[email protected]">andrew.mccluskey@ess.eu</a>/&dagger;<a href="mailto:[email protected]">[email protected]</a>
+&ast;<a href="mailto:[email protected]">andrew.mccluskey@bristol.ac.uk</a>/&dagger;<a href="mailto:[email protected]">[email protected]</a>
 </p>
 
 ---

diff --git a/Snakefile b/Snakefile
@@ -96,6 +96,46 @@ rule rw_4096_kinisi_true_128:
     script:
         f'src/code/random_walks/kinisi_rw.py'
 
+# Random walks simulations
+rule rw_4096_pyblock_true_128:
+    input:
+        f'src/code/random_walks/kinisi_pyblock_rw.py',
+        'src/code/random_walks/random_walk.py'
+    output:
+        f'src/data/random_walks/pyblock/rw_1_128_128_s4096.npz'
+    conda:
+        'environment.yml'
+    cache:
+        True
+    params:
+        jump=2.4494897428,
+        atoms=128,
+        length=128,
+        correlation='true',
+        n=4096
+    script:
+        f'src/code/random_walks/kinisi_pyblock_rw.py'
+
+# Random walks simulations
+rule rw_4096_pyblock_modelfree_true_128:
+    input:
+        f'src/code/random_walks/kinisi_pyblock_modelfree_rw.py',
+        'src/code/random_walks/random_walk.py'
+    output:
+        f'src/data/random_walks/pyblock_modelfree/rw_1_128_128_s4096.npz'
+    conda:
+        'environment.yml'
+    cache:
+        True
+    params:
+        jump=2.4494897428,
+        atoms=128,
+        length=128,
+        correlation='true',
+        n=4096
+    script:
+        f'src/code/random_walks/kinisi_pyblock_modelfree_rw.py'
+
 # Length variation random walks 
 for length in [16, 32, 64, 128, 256, 512, 1024]:
     rule:
@@ -379,3 +419,20 @@ for start_diff in [0, 2, 4, 6, 8, 10, 15, 20]:
         script:
             "src/code/llzo/true_ls.py"
 
+    rule:
+        name:
+            f"glswlsols_D_llzo_{start_diff}"
+        input:
+            'src/code/llzo/glswlsols.py',
+            [f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 16, 1)]
+        output:
+            f'src/data/llzo/glswlsols_{start_diff}.npz'
+        conda:
+            'environment.yml'
+        cache:
+            True
+        params:
+            start_diff=start_diff
+        script:
+            "src/code/llzo/glswlsols.py"
+
diff --git a/environment.yml b/environment.yml
@@ -4,12 +4,12 @@ channels:
 dependencies:
   - numpy=1.23.1
   - pip=21.3.1
-  - python=3.9.7
+  - python=3.10
   - scipy=1.9.3
   - scikit-learn=1.1.3
   - mdanalysis=2.3.0
   - pip:
       - git+https://github.com/matplotlib/matplotlib.git
-      - dynesty==1.0.1
-      - uravu==1.2.9
-      - kinisi==0.6.3
+      - pyblock==0.6
+      - statsmodels==0.14.0
+      - git+https://github.com/bjmorgan/kinisi.git@block
diff --git a/showyourwork.yml b/showyourwork.yml
@@ -61,13 +61,9 @@ dependencies:
     src/scripts/stat_eff.py:
         - src/data/random_walks/stat_eff.npz
         - src/scripts/utils/_fig_params.py
-    src/scripts/efficiency.py:
-        - src/data/random_walks/efficiency.npz
-        - src/scripts/utils/_fig_params.py
-    src/scripts/true_llzo.py:
-        {% for start_diff in [0, 2, 4, 6, 8, 10, 15, 20] %}
-        - src/data/llzo/true_{{start_diff}}.npz
-        {% for n in range(0, 16, 1) %}
-        - src/data/llzo/diffusion_{{n}}_{{start_diff}}.npz
-        {% endfor %} 
-        {% endfor %}
+    src/scripts/glswlsols_llzo.py:
+        - src/data/llzo/glswlsols_10.npz
+    src/scripts/pyblock.py:
+        - src/data/random_walks/kinisi/rw_1_128_128_s4096.npz
+        - src/data/random_walks/pyblock/rw_1_128_128_s4096.npz
+        - src/data/random_walks/pyblock_modelfree/rw_1_128_128_s4096.npz
diff --git a/src/code/llzo/glswlsols.py b/src/code/llzo/glswlsols.py
@@ -0,0 +1,51 @@
+# Copyright (c) Andrew R. McCluskey
+# Distributed under the terms of the MIT License
+# author: Andrew R. McCluskey (arm61)
+
+import numpy as np
+from scipy.stats import linregress
+from scipy.linalg import pinvh
+from emcee import EnsembleSampler
+from scipy.optimize import minimize
+from kinisi.matrix import find_nearest_positive_definite
+from kinisi.diffusion import _straight_line
+
+np.random.seed(1)
+
+start_diff = snakemake.params['start_diff']
+
+timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
+no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
+d = np.zeros((16, 8, 1, timestep.size))
+for i in range(0, 16, 1):
+    d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']
+
+max_ngp = np.argwhere(timestep > start_diff)[0][0]
+true_msd = d.reshape(-1, d.shape[-1])[:, max_ngp:]
+true_cov = np.cov(true_msd.T)
+timestep = timestep[max_ngp:] 
+
+y = true_msd.T
+A = np.array([timestep, np.ones(timestep.size)]).T
+W = true_cov
+V = np.diag(true_cov.diagonal())
+
+g1 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A))),
+               np.matmul(A.T, np.matmul(np.linalg.pinv(W), y)))[0] / 6
+c1 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A)))[0][0]) / 6
+g2 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A))),
+               np.matmul(A.T, np.matmul(np.linalg.pinv(V), y)))[0] / 6
+c2 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A)))[0][0]) / 6
+g3 = np.matmul(np.linalg.inv(np.matmul(A.T, A)), np.matmul(A.T, y))[0] / 6
+
+c3 = []
+for i in true_msd:
+    c3.append(linregress(timestep, i).stderr / 6)
+
+np.savez(f'src/data/llzo/glswlsols_{start_diff}.npz',
+         gls_pop=g1,
+         wls_pop=g2,
+         ols_pop=g3,
+         gls_est=c1,
+         wls_est=c2,
+         ols_est=np.mean(c3))
diff --git a/src/code/llzo/many_runs.py b/src/code/llzo/many_runs.py
@@ -41,25 +41,25 @@ def universe_slice(u: mda.Universe, data_file: str, slice_start: int, slice_end:
 
 
 u = mda.Universe(file, file)
-uu = universe_slice(u, file, 0, 500)
+uu = universe_slice(u, file, 0, 2000)
 step_skip = 100  # sampling rate
 timestep = 5.079648e-4  # ps
 da_params = {'specie': 'Li', 'time_step': timestep, 'step_skip': step_skip}
 p = MDAnalysisParser(uu, **da_params)
 
-ngp = np.zeros((8, 4, p.delta_t.size))
-no = np.zeros((8, 4, p.delta_t.size))
-dt = np.zeros((8, 4, p.delta_t.size))
-msd = np.zeros((8, 4, p.delta_t.size))
-msd_true = np.zeros((8, 4, p.delta_t.size))
-msd_std = np.zeros((8, 4, p.delta_t.size))
-cov = np.zeros((8, 4, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
-d = np.zeros((8, 4, 3200))
-intercept = np.zeros((8, 4, 3200))
+ngp = np.zeros((8, 1, p.delta_t.size))
+no = np.zeros((8, 1, p.delta_t.size))
+dt = np.zeros((8, 1, p.delta_t.size))
+msd = np.zeros((8, 1, p.delta_t.size))
+msd_true = np.zeros((8, 1, p.delta_t.size))
+msd_std = np.zeros((8, 1, p.delta_t.size))
+cov = np.zeros((8, 1, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
+d = np.zeros((8, 1, 3200))
+intercept = np.zeros((8, 1, 3200))
 
 for m in range(0, 8, 1):
-    for i, slice in enumerate(range(0, 2000, 500)):
-        uu = universe_slice(u, file, slice, slice+500)
+    for i, slice in enumerate(range(0, 2000, 2000)):
+        uu = universe_slice(u, file, slice, slice+2000)
 
         rng = np.random.RandomState(42)
         np.random.seed(42)
@@ -76,7 +76,7 @@ def universe_slice(u: mda.Universe, data_file: str, slice_start: int, slice_end:
                 msd_true[m, i, t] = np.sum(j[m::8, ::p.timesteps[t]]**2, axis=-1).mean()
 
         b = MSDBootstrap(p.delta_t, n_disp_3d, p._n_o / 8)
-        b.diffusion(**{'dt_skip': start_diff, 'random_state': rng})
+        b.diffusion(start_diff, random_state=rng)
         ngp[m, i] = b.ngp
         no[m, i] = p._n_o / 8
         dt[m, i] = b.dt

diff --git a/src/code/llzo/true_ls.py b/src/code/llzo/true_ls.py
@@ -15,7 +15,7 @@
 
 timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
 no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
-d = np.zeros((16, 8, 4, timestep.size))
+d = np.zeros((16, 8, 1, timestep.size))
 for i in range(0, 16, 1):
     d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']
 

diff --git a/src/code/random_walks/kinisi_pyblock_modelfree_rw.py b/src/code/random_walks/kinisi_pyblock_modelfree_rw.py
@@ -0,0 +1,46 @@
+# Copyright (c) Andrew R. McCluskey
+# Distributed under the terms of the MIT License
+# author: Andrew R. McCluskey (arm61)
+
+import numpy as np
+from kinisi.diffusion import MSDBootstrap
+from tqdm import tqdm
+from random_walk import get_disp3d, walk
+
+jump = snakemake.params['jump']
+atoms = snakemake.params['atoms']
+length = int(snakemake.params['length'])
+size = int(snakemake.params['n'])
+
+timestep = np.arange(1, length + 1, 1, dtype=int)
+data = np.zeros((size, timestep.size, 4))
+covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
+npd_covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
+n_o = np.zeros((size, timestep.size))
+diff_c = np.zeros((size, 3200))
+intercept = np.zeros((size, 3200))
+
+for seed in tqdm(range(size)):
+    rng = np.random.RandomState(seed)
+    np.random.seed(seed)
+    disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)
+
+    diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False, block=True)
+    diff.diffusion(5, model=False, random_state=rng, progress=False)
+    diff_c[seed] = diff.gradient.samples / 6
+    intercept[seed] = diff.intercept.samples
+    data[seed, :, 0] = diff.dt
+    data[seed, :, 1] = diff.n
+    data[seed, :, 2] = diff.s
+    data[seed, 4:, 3] = diff._model_v
+    npd_covariance[seed] = diff._npd_covariance_matrix
+    covariance[seed] = diff.covariance_matrix
+    n_o[seed] = diff._n_o
+
+np.savez(f'src/data/random_walks/pyblock_modelfree/rw_1_{atoms}_{length}_s{size}.npz',
+         diff_c=diff_c,
+         intercept=intercept,
+         data=data,
+         covariance=covariance,
+         npd_covariance=npd_covariance,
+         n_o=n_o)
diff --git a/src/code/random_walks/kinisi_pyblock_rw.py b/src/code/random_walks/kinisi_pyblock_rw.py
@@ -0,0 +1,46 @@
+# Copyright (c) Andrew R. McCluskey
+# Distributed under the terms of the MIT License
+# author: Andrew R. McCluskey (arm61)
+
+import numpy as np
+from kinisi.diffusion import MSDBootstrap
+from tqdm import tqdm
+from random_walk import get_disp3d, walk
+
+jump = snakemake.params['jump']
+atoms = snakemake.params['atoms']
+length = int(snakemake.params['length'])
+size = int(snakemake.params['n'])
+
+timestep = np.arange(1, length + 1, 1, dtype=int)
+data = np.zeros((size, timestep.size, 4))
+covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
+npd_covariance = np.zeros((size, timestep.size - 4, timestep.size - 4))
+n_o = np.zeros((size, timestep.size))
+diff_c = np.zeros((size, 3200))
+intercept = np.zeros((size, 3200))
+
+for seed in tqdm(range(size)):
+    rng = np.random.RandomState(seed)
+    np.random.seed(seed)
+    disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)
+
+    diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False, block=True)
+    diff.diffusion(5, random_state=rng, progress=False)
+    diff_c[seed] = diff.gradient.samples / 6
+    intercept[seed] = diff.intercept.samples
+    data[seed, :, 0] = diff.dt
+    data[seed, :, 1] = diff.n
+    data[seed, :, 2] = diff.s
+    data[seed, 4:, 3] = diff._model_v
+    npd_covariance[seed] = diff._npd_covariance_matrix
+    covariance[seed] = diff.covariance_matrix
+    n_o[seed] = diff._n_o
+
+np.savez(f'src/data/random_walks/pyblock/rw_1_{atoms}_{length}_s{size}.npz',
+         diff_c=diff_c,
+         intercept=intercept,
+         data=data,
+         covariance=covariance,
+         npd_covariance=npd_covariance,
+         n_o=n_o)
diff --git a/src/code/random_walks/kinisi_rw.py b/src/code/random_walks/kinisi_rw.py
@@ -26,7 +26,7 @@
     disp_3d, n_samples = get_disp3d(walk, length, atoms, jump_size=jump, seed=rng)
 
     diff = MSDBootstrap(timestep, disp_3d, n_samples, random_state=rng, progress=False)
-    diff.diffusion(use_ngp=False, random_state=rng, dt_skip=4, progress=False)
+    diff.diffusion(5, random_state=rng, progress=False)
     diff_c[seed] = diff.gradient.samples / 6
     intercept[seed] = diff.intercept.samples
     data[seed, :, 0] = diff.dt

diff --git a/src/scripts/diffusion.py b/src/scripts/diffusion.py
@@ -13,7 +13,7 @@
 type = 'kinisi'
 
 dllzo_true = np.load(paths.data / "llzo/true_10.npz")['diff_c']
-d = np.zeros((16, 8, 4, 3200))
+d = np.zeros((16, 8, 1, 3200))
 for i in range(0, 16, 1):
     d[i] = np.load(paths.data / f'llzo/diffusion_{i}_10.npz')['d']
 d = d.reshape(-1, 3200)