-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmoran_vec.py
186 lines (144 loc) · 5.53 KB
/
moran_vec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from libpysal.weights.spatial_lag import lag_spatial as slag
import numpy as np
import random
from sklearn import preprocessing
import psutil
PERMUTATIONS = 0
def get_memory():
# Get the virtual memory usage details
memory = psutil.virtual_memory()
# Calculate the free memory in MB
free_memory_mb = (memory.available / 1024.0) / 1024.0
return free_memory_mb
'''
def get_memory():
with open('/proc/meminfo', 'r') as mem:
free_memory = 0
for i in mem:
sline = i.split()
if str(sline[0]) in ('MemFree:', 'Buffers:', 'Cached:'):
free_memory += int(sline[1])
return free_memory / 1024
'''
class Moran(object):
"""
Moran's I Global Autocorrelation Statistic
Adapted from https://github.com/pysal/esda
Parameters
------------
n : int
number of observations
w : W
spatial weights instance
transformation : string
weights transformation, default is row-standardized "r".
Other options include "B": binary, "D":
doubly-standardized, "U": untransformed
(general weights), "V": variance-stabilizing.
permutations : int
number of random permutations for calculation of
pseudo-p_values
Attributes
------------
w : W
original w object
permutations : int
number of permutations
I : array
value of Moran's I
sim : array
(if permutations>0)
vector of I values for permuted samples
p_sim : array
(if permutations>0)
p-value based on permutations (one-tailed)
null: spatial randomness
alternative: the observed I is extreme if
it is either extremely greater or extremely lower
than the values obtained based on permutations
Notes
-----
Technical details and derivations can be found in :cite:`cliff81`.
"""
def __init__(
self, n, w, transformation="r", permutations=PERMUTATIONS
):
#y = np.asarray(y).flatten()
#self.y = y
w.transform = transformation
self.w = w
self.permutations = permutations
self.n = n
self.__moments()
def calc_i(self, X, seed=1, max_RAM=16):
"""
Function to calculate Moran's I of all features
Parameters
------------
X: n x p array
log-transformed feature matrix
seed: int
random seed to make the results reproducible
max_RAM: int, float
maximum limitation of memory (Gb)
Returns
---------
the fitted estimator.
"""
random.seed(seed)
np.random.seed(seed)
self.Z = preprocessing.StandardScaler().fit_transform(X)
#self.Z = np.asarray(self.Z).flatten()
self.Z[self.Z>10] = 10.0
self.Z[self.Z<-10] = -10.0
self.I = self.__calc(self.Z, max_RAM)
if self.permutations:
permutations = self.permutations
sim = [
self.__calc(np.random.permutation(self.Z), max_RAM) for i in range(permutations)
]
self.sim = sim = np.array(sim)
above = self.sim >= np.repeat(self.I[np.newaxis,:],self.permutations,axis=0) #self.association_
larger = above.sum(axis=0)
extreme = np.minimum(self.permutations - larger, larger)
self.p_sim = (extreme + 1.0) / (self.permutations + 1.0)
#above = sim >= self.I
#larger = above.sum()
#if (self.permutations - larger) < larger:
# larger = self.permutations - larger
#self.p_sim = (larger + 1.0) / (permutations + 1.0)
def __moments(self):
n = self.n
n2 = n * n
s1 = self.w.s1
s0 = self.w.s0
s2 = self.w.s2
s02 = s0 * s0
def __calc(self, Z, max_RAM):
'''
memory taken: 0.000008 * (3*n*p + 3*p)
'''
free_memory = min(get_memory(), max_RAM*1024)
if (free_memory*0.8) > (0.000008 * (3 * Z.shape[1] * Z.shape[0] + 3*Z.shape[1])):
z2ss = np.square(Z).sum(axis=0)
zl = slag(self.w, Z)
inum = (Z * zl).sum(axis=0)
return self.n / self.w.s0 * (inum / z2ss)
else:
out = np.zeros(Z.shape[1])
nt = (0.000008 * (3 * Z.shape[1] * Z.shape[0] + 3*Z.shape[1])) / (free_memory*0.8) + 1
nt = int(nt)
nf = Z.shape[1] // nt
for i in range(nt-1):
z2ss = np.square(self.Z[:,(nf*i):(nf*(i+1))]).sum(axis=0)
zl = slag(self.w, Z[:,(nf*i):(nf*(i+1))])
inum = (Z[:,(nf*i):(nf*(i+1))] * zl).sum(axis=0)
out[(nf*i):(nf*(i+1))] = self.n / self.w.s0 * (inum / z2ss)
z2ss = np.square(self.Z[:,(nf*(nt-1)):]).sum(axis=0)
zl = slag(self.w, Z[:,(nf*(nt-1)):])
inum = (Z[:,(nf*(nt-1)):] * zl).sum(axis=0)
out[(nf*(nt-1)):] = self.n / self.w.s0 * (inum / z2ss)
return out
#z2ss = np.square(self.Z).sum(axis=0)
#zl = slag(self.w, Z)
#inum = (Z * zl).sum(axis=0)