This repository has been archived by the owner on Jan 13, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjsd.py
72 lines (67 loc) · 2.99 KB
/
jsd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
import numpy as __np
from scipy.optimize import minimize as __minimize
# Estimation of the empirical cdf
def __EmpiricalCDF(bins,data):
empiricalHistogram=__np.histogram(data,bins=bins)[0]
empiricalCH=__np.cumsum(empiricalHistogram)
return empiricalCH/empiricalCH[-1]
# Maximum likelihood estimation of the parameters
def __MLE(distLike,distParams,data):
return __minimize(distLike,distParams,args=(data),
method="nelder-mead").x
# estimate JSD of a data given assumed distribution
def __JSD(data,empiricalDist,theorDist,returnParams=True):
# run MLE
distParams=__MLE(theorDist["likelihood"],theorDist["params"],data)
# setup empirical and theoretical (assumed) distribution
cdfBins=__np.linspace(empiricalDist["start"],
empiricalDist["stop"],
num=empiricalDist["bins"])
cdf1=__EmpiricalCDF(cdfBins,data)
cdf2=theorDist["cdf"](distParams,cdfBins[1:])
# estimate JSD
mcdf=0.5*(cdf1+cdf2)
with __np.errstate(divide="ignore",invalid="ignore"):
term1=cdf1*__np.log(cdf1/mcdf)
term2=cdf2*__np.log(cdf2/mcdf)
# 0*log(x)=0 (even if x is zero)
term1[cdf1==0]=0
term2[cdf2==0]=0
normalization=__np.sum(cdf1)+__np.sum(cdf2)
jsd=__np.sqrt(__np.sum(term1+term2)/(normalization*__np.log(2)))
if returnParams:
return jsd,distParams
return jsd
# estimate assumed distribution parameters, JSD score, JSD confidence intervals
def JSD(data,empiricalDist,theorDist,bootstrap):
# estimate JSD of the original data
jsdEstimate,distParams=__JSD(data,empiricalDist,theorDist)
# estimate confidence interval of the JSD using bootstrap methods
jsdConfidence=None
if bootstrap["iterations"]>0:
if bootstrap["blockSize"]<=1:
# ordinary bootstrap
tmpJSD=[]
for rep in range(bootstrap["iterations"]):
resample=__np.random.choice(data,size=len(data))
tmpJSD+=[__JSD(resample,empiricalDist,theorDist,returnParams=False),]
jsdConfidence=__np.percentile(tmpJSD,bootstrap["percentiles"])
else:
# moving block bootstrap
origLen=len(data)
data=__np.append(data,data[:bootstrap["blockSize"]-1])
getBlocks=origLen//bootstrap["blockSize"]+1
tmpJSD=[]
for rep in range(bootstrap["iterations"]):
selectedBlocks=__np.random.choice(range(origLen),size=getBlocks)
resample=[data[sb:sb+bootstrap["blockSize"]] for sb in selectedBlocks]
resample=resample[:origLen]
tmpJSD+=[__JSD(resample,empiricalDist,theorDist,returnParams=False),]
jsdConfidence=__np.percentile(tmpJSD,bootstrap["percentiles"])
pass
return {
"parameterEstimates": distParams,
"jsdEstimate": jsdEstimate,
"jsdConfidence": jsdConfidence,
}