From 69de3c44230e4cc717fc3e823d35715355fde7d8 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Thu, 31 Oct 2019 14:46:34 -0500 Subject: [PATCH 01/10] accelerate ordinary kriging with numba --- geostatspy/geostats.py | 1446 ++++++++++++++++++++++------------------ 1 file changed, 811 insertions(+), 635 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 3536e42..09c2242 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -21,10 +21,12 @@ import numpy as np # for ndarrays import numpy.linalg as linalg # for linear algebra import scipy.spatial as sp # for fast nearest neighbor search +import numba as nb # for numerical speed up from numba import jit # for numerical speed up +from numba.typed import Dict # typing of dictionaries from statsmodels.stats.weightstats import DescrStatsW -def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): """Back transform an entire DataFrame column with a provided transformation table and tail extrapolation. :param df: the source DataFrame :param vcol: the column with the variable to transfrom @@ -37,8 +39,8 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utail: upper tail value :param utpar: upper tail extrapolation parameter :return: TODO - """ - + """ + EPSLON=1.0e-20 nd = len(df); nt = len(vr) # number of data to transform and number of data in table backtr = np.zeros(nd) @@ -61,7 +63,7 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): cdfbt = gcum(vrgs[id]) if utail == 1: backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) - elif utail == 2: + elif utail == 2: cpow = 1.0 / utpar backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) elif utail == 4: @@ -74,7 +76,7 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): backtr[id] = powint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs[id],1.0) return backtr -def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): """Back transform a single value with a provided transformation table and tail extrapolation. :param vrgs: value to transform :param vr: the transformation table, 1D ndarray with the original values @@ -86,9 +88,9 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utail: upper tail value :param utpar: upper tail extrapolation parameter :return: TODO - """ + """ EPSLON=1.0e-20 - nt = len(vr) # number of data to transform + nt = len(vr) # number of data to transform # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): if vrgs <= vrg[0]: backtr = vr[0] @@ -106,7 +108,7 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): cdfbt = gcum(vrgs) if utail == 1: backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) - elif utail == 2: + elif utail == 2: cpow = 1.0 / utpar backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) elif utail == 4: @@ -121,18 +123,18 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): def gcum(x): """Calculate the cumulative probability of the standard normal distribution. - :param x: the value from the standard normal distribution + :param x: the value from the standard normal distribution :return: TODO - """ - + """ + z = x - if z < 0: + if z < 0: z = -z t = 1./(1.+ 0.2316419*z) gcum = t*(0.31938153 + t*(-0.356563782 + t*(1.781477937 + t*(-1.821255978 + t*1.330274429)))) e2 = 0. # 6 standard deviations out gets treated as infinity: - if z <= 6.: + if z <= 6.: e2 = np.exp(-z*z/2.)*0.3989422803 gcum = 1.0- e2 * gcum if x >= 0: @@ -150,10 +152,10 @@ def locate(xx,iis,iie,x): :param x: given value :return: TODO """ - + n = len(xx) # Initialize lower and upper methods: - if iis <= 0: + if iis <= 0: iis = 0 if iie >= n: iie = n-1 @@ -163,7 +165,7 @@ def locate(xx,iis,iie,x): j = iie return j # If we are not done then compute a midpoint: - while (ju-jl) > 1: + while (ju-jl) > 1: jm = int((ju+jl)/2) # Replace the lower or upper limit with the midpoint: if (xx[iie] > xx[iis]) == (x > xx[jm]): @@ -193,7 +195,7 @@ def dlocate(xx, iis, iie, x): return j def powint(xlow,xhigh,ylow,yhigh,xval,power): - """Power-based interpolator + """Power-based interpolator :param xlow: x lower interval :param xhigh: x upper interval :param ylow: y lower interval @@ -201,7 +203,7 @@ def powint(xlow,xhigh,ylow,yhigh,xval,power): :param xval: value on x :param power: power for interpolation :return: TODO - """ + """ EPSLON=1.0e-20 if (xhigh-xlow) < EPSLON: powint = (yhigh+ylow)/2.0 @@ -486,18 +488,18 @@ def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): :param y1: y coordinate of first point :param x2: x coordinate of second point :param y2: y coordinate of second point - :param ist: structure index - :param rotmat: 2d rotation matrix + :param ist: structure index + :param rotmat: 2d rotation matrix :param anis: 2D anisotropy ratio :return: TODO - """ - + """ + # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 dx1 = (dx*rotmat[0,ist] + dy*rotmat[1,ist]) dy1 = (dx*rotmat[2,ist] + dy*rotmat[3,ist])/anis[ist] - sqdist_ = (dx1*dx1+dy1*dy1) + sqdist_ = (dx1*dx1+dy1*dy1) return sqdist_ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): @@ -521,7 +523,7 @@ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): rotmat[0,1,2] = sina rotmat[0,2,1] = afac1*(-sina) rotmat[0,2,2] = afac1*(cosa) -# 2nd structure if present +# 2nd structure if present if nst > 1: if ang2 >= 0.0 and ang2 < 270.0: alpha = (90.0 - ang2) * DEG2RAD @@ -553,6 +555,7 @@ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): # Return to calling program: return rotmat +@jit(nopython=True) def ksol_numpy(neq, a, r): """Find solution of a system of linear equations. :param neq: number of equations @@ -562,9 +565,10 @@ def ksol_numpy(neq, a, r): """ a = a[0: neq * neq] # trim the array a = np.reshape(a, (neq, neq)) # reshape to 2D - ainv = linalg.inv(a) # invert matrix + ainv = linalg.inv(a).copy() # invert matrix r = r[0: neq] # trim the array - s = np.matmul(ainv, r) # matrix multiplication + # s = np.matmul(ainv, r) # matrix multiplication + s = ainv @ r # matrix multiplication return s def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,global_rotmat,radsqd): @@ -580,16 +584,16 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Size of the look-up table: tmp = np.zeros(MAXXYZ) MAXORD = MAXXYZ - if (nx*ny) < MAXCXY: + if (nx*ny) < MAXCXY: MAXORD = MAXCXY - + order = np.zeros(MAXORD) nctx = int(min(((MAXCTX-1)/2),(nx-1))) ncty = int(min(((MAXCTY-1)/2),(ny-1))) - + # print('CTable check') # print('nctx ' + str(nctx) + ', ncty ' + str(ncty)) - + ixnode = np.zeros(MAXXYZ) iynode = np.zeros(MAXXYZ) covtab = np.zeros((MAXCTX,MAXCTY)) @@ -600,41 +604,41 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Now, set up the table and keep track of the node offsets that are # within the search radius: nlooku = -1 # adjusted for 0 origin - for i in range(-nctx,nctx+1): # cover entire range + for i in range(-nctx,nctx+1): # cover entire range xx = i * xsiz ic = nctx + i - for j in range(-ncty,ncty+1): # cover entire range + for j in range(-ncty,ncty+1): # cover entire range yy = j * ysiz jc = ncty + j covtab[ic,jc] = cova2(0.0,0.0,xx,yy,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) # print('cov table offset'); print(xx,yy); print(covtab[ic,jc]) hsqd = sqdist(0.0,0.0,0.0,xx,yy,0.0,MAXROT,global_rotmat) - if hsqd <= radsqd: + if hsqd <= radsqd: nlooku = nlooku + 1 # We want to search by closest variogram distance (and use the # anisotropic Euclidean distance to break ties: tmp[nlooku] = - (covtab[ic,jc] - TINY*hsqd) order[nlooku] = (jc)*MAXCTX+ic -# print('populated presort'); print(tmp,order) +# print('populated presort'); print(tmp,order) # Finished setting up the look-up table, now order the nodes such # that the closest ones, according to variogram distance, are searched # first. Note: the "loc" array is used because I didn't want to make # special allowance for 2 byte integers in the sorting subroutine: - + nlooku = nlooku + 1 # print('nlooku' + str(nlooku)); print('MAXCTX' + str(MAXCTX)) tmp, order = dsortem(0,nlooku,tmp,2,b=order) # print('populated postsort'); print(tmp,order) - for il in range(0,nlooku): + for il in range(0,nlooku): loc = int(order[il]) iy = int((loc-0)/MAXCTX) ix = loc - (iy-0)*MAXCTX iynode[il] = int(iy) ixnode[il] = int(ix) # print('populated ix, iy node list'); print(ixnode, iynode) - + return covtab,tmp,order,ixnode,iynode,nlooku,nctx,ncty def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nctx,ncty,UNEST): @@ -643,29 +647,29 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc Austin (March, 2019). Note this was simplified to 2D only. """ - + # Consider all the nearby nodes until enough have been found: - ncnode = 0; - icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) + ncnode = 0; + icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) cnodev = np.zeros(nodmax);cnodex = np.zeros(nodmax); cnodey = np.zeros(nodmax); - + # print('Node search at '); print(ix,iy) # print('nlooku'); print(nlooku) if noct > 0: ninoct = np.zeros(8) - for il in range(0,nlooku): + for il in range(0,nlooku): if ncnode == nodmax: return ncnode, icnode, cnodev, cnodex, cnodey i = ix + (int(ixnode[il])-nctx) j = iy + (int(iynode[il])-ncty) # print('i,j'); print(i,j) if i < 0 or j < 0: continue if i >= nx or j >= ny: continue - ind = i + (j)*nx + ind = i + (j)*nx if sim[ind] > UNEST: icnode[ncnode] = il cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin cnodey[ncnode] = ymn + (j)*ysiz - cnodev[ncnode] = sim[ind] + cnodev[ncnode] = sim[ind] # print('srchnd found at index - ' +str(ind) + ' at x and y ' + str(cnodex[ncnode]) + ',' + str(cnodey[ncnode])) # print(' ix = ' + str(i) + ' and iy = ' + str(j)) # print(' value = ' + str(sim[ind])) @@ -681,12 +685,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar EPSLON = 1.0e-20; UNEST=-1.0 # Check for both "zval" and "cdfval" defined or undefined: - ierr = 1; - if zval > UNEST and cdfva > UNEST: + ierr = 1; + if zval > UNEST and cdfva > UNEST: return -1 - if zval <= UNEST and cdfval <= UNEST: + if zval <= UNEST and cdfval <= UNEST: return - 1 - + # Handle the case of a categorical variable: if ivtype == 0: cum = 0 @@ -696,14 +700,14 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar zval = ccut[i] return zval return zval - + # Figure out what part of distribution: ipart = 0 - lower tail # ipart = 1 - middle # ipart = 2 - upper tail ierr = 0 ipart = 1 if zva > UNEST: - if zval <= ccut[0]: + if zval <= ccut[0]: ipart = 0 if zval >= ccut[nccut-1]: ipart = 2 @@ -712,10 +716,10 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar ipart = 0 if cdfval >= ccdf[nccut-1]: ipart = 2 - + # ARE WE IN THE LOWER TAIL? - if ipart == 0: + if ipart == 0: if ltail ==1: # Straight Linear Interpolation: powr = 1.0 @@ -726,12 +730,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar elif ltail == 2: # Power Model interpolation to lower limit "zmin"? - if zval > UNEST: + if zval > UNEST: cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,ltpar) else: powr = 1.0 / ltpar zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) - + # Linear interpolation between the rescaled global cdf? elif ltail == 3: if zval > UNEST: @@ -767,12 +771,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Error situation - unacceptable option: ierr = 2 return -1 - + # FINISHED THE LOWER TAIL, ARE WE IN THE MIDDLE? if ipart == 1: # Establish the lower and upper limits: - if zval > UNEST: + if zval > UNEST: cclow = locate(ccut,1,nccut,zval) else: cclow = locate(ccdf,1,nccut,cdfval) @@ -785,7 +789,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,powr) else: zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) - + # Power interpolation between class bounds? elif middle == 2: if zval > UNEST: @@ -793,14 +797,14 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: powr = 1.0 / mpar zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) - + # Linear interpolation between the rescaled global cdf? elif middle == 3: ilow = locate(cut,ncut,1,ncut,ccut[cclow]) iupp = locate(cut,ncut,1,ncut,ccut[cchigh]) - if cut[ilow] < ccut[cclow]: + if cut[ilow] < ccut[cclow]: ilow = ilow + 1 - if cut[iupp] > ccut[cchigh]: + if cut[iupp] > ccut[cchigh]: iupp = iupp - 1 if zval > UNEST: idat = locate(cut,1,ncut,zval) @@ -821,7 +825,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: temp=powint(ccdf[cclow],ccdf[cchigh],cdf[ilow],cdf[iupp],cdfval,1.) idat = locate(cdf,1,ncut,temp) - if cut[idat] < ccut[cclow]: + if cut[idat] < ccut[cclow]: idat=idat+1 if idat <= -1 or idat >= ncut-1 or cut[idat+1] > ccut[cchigh]: zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) @@ -836,13 +840,13 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar return -1 # FINISHED THE MIDDLE, ARE WE IN THE UPPER TAIL? - if ipart == 2: - if utail == 1: + if ipart == 2: + if utail == 1: powr = 1.0 if zval > UNEST: cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,powr) else: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) elif utail == 2: # Power interpolation to upper limit "utpar"? @@ -861,7 +865,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar ilow = locate(cut,1,ncut,ccut(nccut),ilow) if cut[idat] < zval: idat = idat + 1 - if cut[ilow] < ccut[nccut-1]: + if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear @@ -876,7 +880,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Computing Z value: Are there any data out in the tail? ilow = locate(cut,ncut,1,ncut,ccut(nccut),ilow) - if cut[ilow] < ccut[nccut-1]: + if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear @@ -886,7 +890,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: temp = powint(ccdf(nccut),1.0,cdf(ilow),1.0,cdfval,1.) idat = locate(cdf,ncut,1,ncut,temp) - if cut[idat] < ccut[nccut-1]: + if cut[idat] < ccut[nccut-1]: idat=idat+1 if idat >= ncut-1: zval = powint(ccdf[nccut-1],1.0,ccut[nccut-1],zmax,cdfval,1.) @@ -898,20 +902,20 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Figure out "lambda" and required info: lambd = math.pow(ccut[nccut],utpar)*(1.0-ccdf[nccut-1]) - if zval > UNEST: + if zval > UNEST: cdfval = 1.0 - (lambd/(math.pow(zval,utpar))) else: - zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) + zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) else: # Error situation - unacceptable option: ierr = 2 return -1 - + if zval < zmin: zval = zmin - if zval > zmax: + if zval > zmax: zval = zmax # All finished - return: @@ -926,19 +930,19 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct """ EPSLON = 1.0e-20 cur_index = ix + (iy)*nx -# print('krige at grid '); print(ix,iy) +# print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) # print('Check ixnode '); print(ixnode); print(iynode) nclose = len(close) - ncnode = (icnode >= 0).sum() + ncnode = (icnode >= 0).sum() # print('In kriging, maxcov = ' + str(maxcov)) # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # print(r.shape) # Local mean if lktype == 2: @@ -959,7 +963,7 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) - + # Set up kriging matrices: iin=-1 # acocunting for 0 origin # print('krige na' + str(na)) @@ -972,13 +976,13 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - if sec.shape[0] > 1: + if sec.shape[0] > 1: vrea[j]= sec[index]; else: vrea[j] = 0.0 # added this - no effect if lktype == 2: vra[j] = vra[j] - vrea[j] else: - + # It is a previously simulated node (keep index for table look-up): # print(j) index = j-(nclose) # adjust for 0 index @@ -986,15 +990,15 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct y1 = cnodey[index] vra[j] = cnodev[index] ind = icnode[index] -# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) +# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) ix1 = ix + (int(ixnode[ind])-nctx-1) iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) index = ix1 + (iy1-1)*nx - if lktype == 2: + if lktype == 2: vrea[j]= lvm[index] vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix + for i in range(0,na): # we need the full matrix # print('kriging indice populated' + str(j) + ',' + str(i)) # Sort out the actual location of point "i" if i < nclose: @@ -1040,37 +1044,37 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - r[j] = cov +# print('ix1,iy1'); print(x1,y1) + r[j] = cov else: # Try to use the covariance look-up (if the distance is in range): # ii = nctx + 1 + (ix - ix1) # jj = ncty + 1 + (iy - iy1) - + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) # print('ix,iy ='); print(ix,iy) # print('ix1,iy1'); print(ix1,iy1) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 # print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) +# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: # cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - +# print('ix1,iy1'); print(x1,y1) + r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] if lktype == 1: # we need the full array iin = iin + 1 - a[iin] = 1.0 + a[iin] = 1.0 if lktype == 4: # we need the full array iin = iin + 1 - a[iin] = colocorr*r[j] + a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: for i in range(0,na): @@ -1114,12 +1118,12 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct ii = na r[ii] = colocorr rr[ii] = r[ii] -# if (sfmax-sfmin) < EPSLON: +# if (sfmax-sfmin) < EPSLON: # neq = neq - 1 # colc = False # Solve the Kriging System: -# print('neq = ' + str(neq)); +# print('neq = ' + str(neq)); # print('a'); print(a) # print('r'); print(r) # print('data'); print(vra) @@ -1129,11 +1133,11 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct else: # print('neq prior ksol' + str(neq)) s = ksol_numpy(neq,a,r) -# print('neq post ksol' + str(neq)) +# print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) -# print('r'); print(r) +# print('r'); print(r) ising = 0 # need to figure this out # print('s'); print(s) @@ -1148,13 +1152,13 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('kriging weights'); print(s) cmean = 0.0 # print('cbb = ' + str(cbb)) - cstdev = cbb + cstdev = cbb sumwts = 0.0 for i in range(0,na): cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] - if lktype == 1: + if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) if lktype == 2: cmean = cmean + gmean @@ -1184,25 +1188,25 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n """ EPSLON = 1.0e-20 cur_index = ix + (iy)*nx -# print('krige at grid '); print(ix,iy) +# print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) # print('Check ixnode '); print(ixnode); print(iynode) nclose = len(close) - ncnode = (icnode >= 0).sum() + ncnode = (icnode >= 0).sum() # print('In kriging, maxcov = ' + str(maxcov)) # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # print(r.shape) -# Local mean # just pass the local probability as gmean +# Local mean # just pass the local probability as gmean # if lktype == 2: # gmean = lvm[cur_index] -# keep input gmean otherwise +# keep input gmean otherwise # Size of the kriging system: first = False @@ -1217,7 +1221,7 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) - + # print('kriging data close'); print(close) # print('kriging node close'); print(icnode) # Set up kriging matrices: @@ -1232,13 +1236,13 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) -# if lvm.shape[0] > 1: +# if lvm.shape[0] > 1: # vrea[j]= sec[index]; # else: vrea[j] = 0.0 # added this - no effect # if lktype == 2: vra[j] = vra[j] - vrea[j] # just using local variable mean not full residual approach else: - + # It is a previously simulated node (keep index for table look-up): # print(j) index = j-(nclose) # adjust for 0 index @@ -1246,15 +1250,15 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n y1 = cnodey[index] vra[j] = cnodev[index] ind = icnode[index] -# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) +# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) ix1 = ix + (int(ixnode[ind])-nctx-1) iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) index = ix1 + (iy1-1)*nx -# if lktype == 2: +# if lktype == 2: # vrea[j]= lvm[index] # vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix + for i in range(0,na): # we need the full matrix # print('kriging indice populated' + str(j) + ',' + str(i)) # Sort out the actual location of point "i" if i < nclose: @@ -1303,37 +1307,37 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - r[j] = cov +# print('ix1,iy1'); print(x1,y1) + r[j] = cov else: # Try to use the covariance look-up (if the distance is in range): # ii = nctx + 1 + (ix - ix1) # jj = ncty + 1 + (iy - iy1) - + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) # print('ix,iy ='); print(ix,iy) # print('ix1,iy1'); print(ix1,iy1) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 # print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) +# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: # cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - +# print('ix1,iy1'); print(x1,y1) + r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] if lktype == 1: # we need the full array iin = iin + 1 - a[iin] = 1.0 + a[iin] = 1.0 if lktype == 4: # we need the full array iin = iin + 1 - a[iin] = colocorr*r[j] + a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: for i in range(0,na): @@ -1377,12 +1381,12 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n ii = na r[ii] = colocorr rr[ii] = r[ii] -# if (sfmax-sfmin) < EPSLON: +# if (sfmax-sfmin) < EPSLON: # neq = neq - 1 # colc = False # Solve the Kriging System: -# print('Kriging equations neq = ' + str(neq)); +# print('Kriging equations neq = ' + str(neq)); # print('a'); print(a) # print('r'); print(r) # print('data'); print(vra) @@ -1392,11 +1396,11 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n else: # print('neq prior ksol' + str(neq)) s = ksol_numpy(neq,a,r) -# print('neq post ksol' + str(neq)) +# print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) -# print('r'); print(r) +# print('r'); print(r) ising = 0 # need to figure this out # print('s'); print(s) @@ -1411,13 +1415,13 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('kriging weights'); print(s) cmean = 0.0 # print('cbb = ' + str(cbb)) - cstdev = cbb + cstdev = cbb sumwts = 0.0 for i in range(0,na): cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] - if lktype == 1: + if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) # if lktype == 2: cmean = cmean + gmean @@ -1433,7 +1437,7 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n if lktype == 0 or lktype == 2: cmean = cmean + (1.0-sumwts)*gmean # print('cmean'); print(cmean) - + # Error message if negative variance: if cstdev < 0.0: # print('ERROR: Negative Variance: ' + str(cstdev)) @@ -1860,7 +1864,7 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): # Main loop over all pairs for i in range(0, nd): for j in range(0, nd): - + # Definition of the lag corresponding to the current pair dx = x[j] - x[i] dy = y[j] - y[i] @@ -1987,16 +1991,16 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values - vr = df_extract[vcol].values - + vr = df_extract[vcol].values + # Summary statistics for the data after trimming avg = vr.mean() stdev = vr.std() sills = stdev**2.0 ssq = sills vrmin = vr.min() - vrmax = vr.max() - + vrmax = vr.max() + # Initialize the summation arrays npp = np.zeros((nylag*2+1,nxlag*2+1)) gam = np.zeros((nylag*2+1,nxlag*2+1)) @@ -2006,11 +2010,11 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): tm = np.zeros((nylag*2+1,nxlag*2+1)) hv = np.zeros((nylag*2+1,nxlag*2+1)) tv = np.zeros((nylag*2+1,nxlag*2+1)) - - # First fix the location of a seed point: - for i in range(0,nd): - # Second loop over the data: - for j in range(0,nd): + + # First fix the location of a seed point: + for i in range(0,nd): + # Second loop over the data: + for j in range(0,nd): # The lag: ydis = y[j] - y[i] iyl = nylag + int(ydis/dylag) @@ -2019,7 +2023,7 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): xdis = x[j] - x[i] ixl = nxlag + int(xdis/dxlag) if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing - continue + continue # We have an acceptable pair, therefore accumulate all the statistics # that are required for the variogram: npp[iyl,ixl] = npp[iyl,ixl] + 1 # our ndarrays read from the base to top, so we flip @@ -2028,11 +2032,11 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): tv[iyl,ixl] = tm[iyl,ixl] + vr[i]*vr[i] hv[iyl,ixl] = hm[iyl,ixl] + vr[j]*vr[j] gam[iyl,ixl] = gam[iyl,ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) - + # Get average values for gam, hm, tm, hv, and tv, then compute # the correct "variogram" measure: - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): + for iy in range(0,nylag*2+1): + for ix in range(0,nxlag*2+1): if npp[iy,ix] <= minnp: gam[iy,ix] = -999. hm[iy,ix] = -999. @@ -2045,14 +2049,14 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): hm[iy,ix] = hm[iy,ix] / rnum tm[iy,ix] = tm[iy,ix] / rnum hv[iy,ix] = hv[iy,ix] / rnum - hm[iy,ix]*hm[iy,ix] - tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] + tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] # Attempt to standardize: if isill > 0: gamf[iy,ix] = gamf[iy,ix]/sills - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): + for iy in range(0,nylag*2+1): + for ix in range(0,nxlag*2+1): gamf[iy,ix] = gam[nylag*2-iy,ix] - nppf[iy,ix] = npp[nylag*2-iy,ix] + nppf[iy,ix] = npp[nylag*2-iy,ix] return gamf, nppf def vmodel( @@ -2064,27 +2068,27 @@ def vmodel( """GSLIB's VMODEL program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (Mar, 2019). - :param nlag: number of variogram lags + :param nlag: number of variogram lags :param xlag: size of the lags - :param axm: direction by 2D azimuth, 000 is y positive, 090 is x positive + :param axm: direction by 2D azimuth, 000 is y positive, 090 is x positive :param vario: dictionary with the variogram parameters :return: """ - + # Parameters MAXNST=4 - DEG2RAD=3.14159265/180.0 + DEG2RAD=3.14159265/180.0 MAXROT=MAXNST+1 EPSLON = 1.0e-20 VERSION= 1.01 - + # Declare arrays index = np.zeros(nlag+1) h = np.zeros(nlag+1) gam = np.zeros(nlag+1) cov = np.zeros(nlag+1) ro = np.zeros(nlag+1) - + # Load the variogram nst = vario["nst"] cc = np.zeros(nst) @@ -2092,7 +2096,7 @@ def vmodel( it = np.zeros(nst) ang = np.zeros(nst) anis = np.zeros(nst) - + c0 = vario["nug"] cc[0] = vario["cc1"] it[0] = vario["it1"] @@ -2105,14 +2109,14 @@ def vmodel( ang[1] = vario["azi2"] aa[1] = vario["hmaj2"] anis[1] = vario["hmin2"] / vario["hmaj2"] - + xoff = math.sin(DEG2RAD*azm)*xlag yoff = math.cos(DEG2RAD*azm)*xlag print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff)) - rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) - - - xx = 0.0; yy = 0.0 + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) + + + xx = 0.0; yy = 0.0 for il in range(0,nlag+1): index[il] = il cov[il] = cova2(0.0,0.0,xx,yy,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) @@ -2197,6 +2201,10 @@ def nscore( return ns, vr, wt_ns +_nums = [] +_dists = [] +_weights = [] + def kb2d( df, xcol, @@ -2244,31 +2252,31 @@ def kb2d( :param vario: :return: """ - + # Constants UNEST = -999. EPSLON = 1.0e-10 VERSION = 2.907 first = True - PMX = 9999.0 + PMX = 9999.0 MAXSAM = ndmax + 1 MAXDIS = nxdis * nydis MAXKD = MAXSAM + 1 MAXKRG = MAXKD * MAXKD - + # load the variogram nst = vario['nst'] cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) ang = np.zeros(nst); anis = np.zeros(nst) - - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; + + c0 = vario['nug']; + cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; + cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; - -# Allocate the needed memory: + +# Allocate the needed memory: xdb = np.zeros(MAXDIS) ydb = np.zeros(MAXDIS) xa = np.zeros(MAXSAM) @@ -2290,8 +2298,8 @@ def kb2d( x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values - -# Make a KDTree for fast search of nearest neighbours + +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,nd)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) @@ -2303,11 +2311,18 @@ def kb2d( vrmin = vr.min() vrmax = vr.max() + global _nums + global _dists + global _weights + _nums = [] + _dists = [] + _weights = [] + # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): ndb = nxdis * nydis - if ndb > MAXDIS: + if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') return kmap @@ -2315,10 +2330,10 @@ def kb2d( ydis = ysiz / max(float(nydis),1.0) xloc = -0.5*(xsiz+xdis) i = -1 # accounting for 0 as lowest index - for ix in range(0,nxdis): + for ix in range(0,nxdis): xloc = xloc + xdis yloc = -0.5*(ysiz+ydis) - for iy in range(0,nydis): + for iy in range(0,nydis): yloc = yloc + ydis i = i+1 xdb[i] = xloc @@ -2337,10 +2352,10 @@ def kb2d( if ndb <= 1: cbb = cov else: - for i in range(0,ndb): - for j in range(0,ndb): + for i in range(0,ndb): + for j in range(0,ndb): cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) - if i == j: + if i == j: cov = cov - c0 cbb = cbb + cov cbb = cbb/real(ndb*ndb) @@ -2350,22 +2365,22 @@ def kb2d( ak = 0.0 vk = 0.0 for iy in range(0,ny): - yloc = ymn + (iy-0)*ysiz + yloc = ymn + (iy-0)*ysiz for ix in range(0,nx): xloc = xmn + (ix-0)*xsiz current_node = (yloc,xloc) - + # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search # remove any data outside search radius na = len(dist) nums = nums[dist UNEST: @@ -2489,6 +2505,11 @@ def kb2d( ak = ak + est vk = vk + est*est + if xloc == 21 and yloc == 102: + _nums.append(nums.copy()) + _dists.append(dist.copy()) + _weights.append(s) + # END OF MAIN LOOP OVER ALL THE BLOCKS: if nk >= 1: @@ -2499,8 +2520,374 @@ def kb2d( return kmap, vmap +_nums = None +_dists = None +_weights = None + + +@jit(nopython=True) +def kb2d_jit_main( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float): + +# Constants + UNEST = -999. + EPSLON = 1.0e-10 + VERSION = 2.907 + first = True + PMX = 9999.0 + MAXSAM = ndmax + 1 + MAXDIS = nxdis * nydis + MAXKD = MAXSAM + 1 + MAXKRG = MAXKD * MAXKD + + ndmax = min(ndmax,nd) + +# load the variogram + nst = vario_int['nst'] + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) + +# Allocate the needed memory: + xdb = np.zeros(MAXDIS) + ydb = np.zeros(MAXDIS) + xa = np.zeros(MAXSAM) + ya = np.zeros(MAXSAM) + vra = np.zeros(MAXSAM) +# dist = np.zeros(MAXSAM) +# nums = np.zeros(MAXSAM) + r = np.zeros(MAXKD) + rr = np.zeros(MAXKD) + s = np.zeros(MAXKD) + a = np.zeros(MAXKRG) + kmap = np.zeros((nx,ny)) + vmap = np.zeros((nx,ny)) + + + c0 = vario_float['nug'] + cc[0] = vario_float['cc1'] + it[0] = vario_int['it1'] + ang[0] = vario_int['azi1'] + aa[0] = vario_int['hmaj1'] + anis[0] = vario_int['hmin1']/vario_int['hmaj1'] + + if nst == 2: + cc[1] = vario_float['cc2'] + it[1] = vario_int['it2'] + ang[1] = vario_int['azi2'] + aa[1] = vario_int['hmaj2'] + anis[1] = vario_int['hmin2']/vario_int['hmaj2'] + + +# Summary statistics for the data after trimming + avg = np.mean(vr) + stdev = np.std(vr) + ss = stdev ** 2.0 + vrmin = np.min(vr) + vrmax = np.max(vr) + +# Set up the discretization points per block. Figure out how many +# are needed, the spacing, and fill the xdb and ydb arrays with the +# offsets relative to the block center (this only gets done once): + ndb = nxdis * nydis + if ndb > MAXDIS: + print('ERROR KB2D: Too many discretization points ') + print(' Increase MAXDIS or lower n[xy]dis') +# return kmap + xdis = xsiz / max(float(nxdis), 1.0) + ydis = ysiz / max(float(nydis), 1.0) + xloc = -0.5 * (xsiz + xdis) + i = -1 # accounting for 0 as lowest index + for ix in range(0, nxdis): + xloc = xloc + xdis + yloc = -0.5 * (ysiz+ydis) + for iy in range(0, nydis): + yloc = yloc + ydis + i += 1 + xdb[i] = xloc + ydb[i] = yloc + +# Initialize accumulators: + cbb = 0.0 + rad2 = radius * radius + +# Calculate Block Covariance. Check for point kriging. + rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) + cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) +# Keep this value to use for the unbiasedness constraint: + unbias = cov + first = False + if ndb <= 1: + cbb = cov + else: + for i in range(0, ndb): + for j in range(0, ndb): + cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + if i == j: + cov = cov - c0 + cbb = cbb + cov + # print(real) + # cbb = cbb/real(ndb*ndb) # TODO: undefined? + +# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: + nk = 0 + ak = 0.0 + vk = 0.0 + + for iy in range(0,ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0,nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc, xloc) + +# Find the nearest samples within each octant: First initialize +# the counter arrays: + na = -1 # accounting for 0 as first index +# dist.fill(1.0e+20) +# nums.fill(-1) + # dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search + dist, nums = tree[current_node] + # remove any data outside search radius + na = len(dist) + nums = nums[dist UNEST: + nk = nk + 1 + ak = ak + est + vk = vk + est*est + + if nk >= 1: + ak = ak / float(nk) + vk = vk/float(nk) - ak*ak +# print(' Estimated ' + str(nk) + ' blocks ') +# print(' average ' + str(ak) + ' variance ' + str(vk)) + + return kmap, vmap + + +def kb2d_jit( + df, + xcol, + ycol, + vcol, + tmin, + tmax, + nx, + xmn, + xsiz, + ny, + ymn, + ysiz, + nxdis, + nydis, + ndmin, + ndmax, + radius, + ktype, + skmean, + vario, +): + """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the + original Fortran to Python by Michael Pyrcz, the University of Texas at + Austin (Jan, 2019). + :param df: pandas DataFrame with the spatial data + :param xcol: name of the x coordinate column + :param ycol: name of the y coordinate column + :param vcol: name of the property column + :param tmin: property trimming limit + :param tmax: property trimming limit + :param nx: definition of the grid system (x axis) + :param xmn: definition of the grid system (x axis) + :param xsiz: definition of the grid system (x axis) + :param ny: definition of the grid system (y axis) + :param ymn: definition of the grid system (y axis) + :param ysiz: definition of the grid system (y axis) + :param nxdis: number of discretization points for a block + :param nydis: number of discretization points for a block + :param ndmin: minimum number of data points to use for kriging a block + :param ndmax: maximum number of data points to use for kriging a block + :param radius: maximum isotropic search radius + :param ktype: + :param skmean: + :param vario: + :return: + """ +# Load the data + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + nd = len(df_extract) + x = df_extract[xcol].values + y = df_extract[ycol].values + vr = df_extract[vcol].values + +# Make a KDTree for fast search of nearest neighbours + # dp = list((y[i], x[i]) for i in range(0,nd)) + data_locs = np.column_stack((y,x)) + import scipy.spatial as sp + _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + + vario_int = Dict.empty( + key_type = nb.types.unicode_type, + value_type = nb.types.i8 + ) + + vario_float = Dict.empty( + key_type = nb.types.unicode_type, + value_type = nb.f8 + ) + + vario_float['nug'] = vario.pop('nug') + vario_float['cc1'] = vario.pop('cc1') + vario_float['cc2'] = vario.pop('cc2') + + for key in vario.keys(): + vario_int[key] = vario[key] + + tree = Dict.empty( + key_type = nb.types.Tuple((nb.f8, nb.f8)), + value_type = nb.types.Tuple((nb.f8[:], nb.i4[:])) + ) + + for iy in range(0,ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0,nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc,xloc) + tree[current_node] = _tree.query(current_node, ndmax, distance_upper_bound=radius) + + kmap, vmap = kb2d_jit_main( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float) + + return kmap, vmap + + def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xmn,xsiz,ny,ymn,ysiz,ndmin,ndmax,radius,ktype,vario): - + """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -2531,7 +2918,7 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm :param vario: list with all of the indicator variograms (sill of 1.0) in consistent order with above parameters :return: """ - + # Find the needed paramters: PMX = 9999.9 MAXSAM = ndmax + 1 @@ -2539,20 +2926,20 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm mik = 0 # full indicator kriging use_trend = False if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: use_trend = True - + # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) - aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) + nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) + aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) ang = np.zeros((MAXNST,ncut)); anis = np.zeros((MAXNST,ncut)) for icut in range(0,ncut): nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; - ang[0,icut] = vario[icut]['azi1']; + c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; + ang[0,icut] = vario[icut]['azi1']; aa[0,icut] = vario[icut]['hmaj1']; anis[0,icut] = vario[icut]['hmin1']/vario[icut]['hmaj1']; if nst[icut] == 2: - cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; + cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; aa[1,icut] = vario[icut]['hmaj2']; anis[1,icut] = vario[icut]['hmin2']/vario[icut]['hmaj2']; # Load the data @@ -2563,8 +2950,8 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm MAXROT = MAXNST*MAXCUT+ 1 ikout = np.zeros((nx,ny,ncut)) maxcov = np.zeros(ncut) - - # Allocate the needed memory: + + # Allocate the needed memory: xa = np.zeros(MAXSAM) ya = np.zeros(MAXSAM) vra = np.zeros(MAXSAM) @@ -2576,37 +2963,37 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm a = np.zeros(MAXEQ*MAXEQ) ikmap = np.zeros((nx,ny,ncut)) vr = np.zeros((MAXDAT,MAXCUT+1)) - + nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) xviol = np.zeros(MAXCUT) - + ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) ikout = np.zeros((nx,ny,ncut)) - + x = df_extract[xcol].values y = df_extract[ycol].values v = df_extract[vcol].values - + # The indicator data are constructed knowing the thresholds and the # data value. - + if ivtype == 0: - for icut in range(0,ncut): + for icut in range(0,ncut): vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): + for icut in range(0,ncut): vr[:,icut] = np.where(v <= thresh[icut], '1', '0') vr[:,ncut] = v -# Make a KDTree for fast search of nearest neighbours +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,MAXDAT)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - + # Summary statistics of the input data - + avg = vr[:,ncut].mean() stdev = vr[:,ncut].std() ss = stdev**2.0 @@ -2615,20 +3002,20 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - + actloc = np.zeros(MAXDAT, dtype = int) for i in range(1,MAXDAT): actloc[i] = i - + # Set up the rotation/anisotropy matrices that are needed for the # variogram and search: print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): + for ic in range(0,ncut): rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[:,ic],cc[:,ic],ang[:,ic],9999.9) - rotmat.append(rotmat_temp) + rotmat.append(rotmat_temp) # Initialize accumulators: # not setup yet nk = 0 xk = 0.0 @@ -2637,11 +3024,11 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm nviol[icut] = 0 aviol[icut] = 0.0 xviol[icut] = -1.0 - nxy = nx*ny + nxy = nx*ny print('Working on the kriging') # Report on progress from time to time: - if koption == 0: + if koption == 0: nxy = nx*ny nloop = nxy irepo = max(1,min((nxy/10),10000)) @@ -2649,14 +3036,14 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm nloop = 10000000 irepo = max(1,min((nd/10),10000)) ddh = 0.0 - + # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: for index in range(0,nloop): - + if (int(index/irepo)*irepo) == index: print(' currently on estimate ' + str(index)) - + if koption == 0: - iy = int((index)/nx) + iy = int((index)/nx) ix = index - (iy)*nx xloc = xmn + (ix)*xsiz yloc = ymn + (iy)*ysiz @@ -2672,8 +3059,8 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm dist, close = tree.query(current_node,ndmax) # use kd tree for fast nearest data search # remove any data outside search radius close = close[dist 50: MAXSBX=50 MAXSBY = 1 - if ny > 1: + if ny > 1: MAXSBY = int(ny/2) if MAXSBY > 50: MAXSBY=50 MAXSBZ = 1 MAXSB = MAXSBX*MAXSBY*MAXSBZ - + # Declare arrays dist = np.zeros(ndmax) nums = np.zeros(ndmax,dtype = int) - + # Perform some quick checks if nx > MAXX or ny> MAXY: print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') @@ -2841,7 +3228,7 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if utail != 1 and utail != 2 and utail != 4: print('ERROR invalid upper tail option ' + str(ltail)) print(' only allow 1,2 or 4 - see GSLIB manual ') - return sim + return sim if utail == 4 and utpar < 1.0: print('ERROR invalid power for hyperbolic tail' + str(utpar)) print(' must be greater than 1.0!') @@ -2850,11 +3237,11 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc print('ERROR invalid power for power model' + str(ltpar)) print(' must be greater than 0.0!') return sim - if utail == 2 and utpar < 0.0: + if utail == 2 and utpar < 0.0: print('ERROR invalid power for power model' + str(utpar)) print(' must be greater than 0.0!') return sim - + # Load the data df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax nd = len(df_extract) @@ -2865,8 +3252,8 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc vr_orig = np.copy(vr) # print('size of data extract'); print(len(vr)) wt = []; wt = np.array(wt) - if wcol > -1: - wt = df_extract[wcol].values + if wcol > -1: + wt = df_extract[wcol].values else: wt = np.ones(nd) sec = []; sec = np.array(sec) @@ -2874,19 +3261,19 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc sec = df_extract[scol].values if itrans == 1: if ismooth == 1: - dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] + dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] ntr = len(dftrans_extract) vrtr = dftrans_extrac[tcol].values - if twtcol > -1: + if twtcol > -1: vrgtr = dftrans_extrac[tcol].values else: - vrgtr = np.ones(ntr) + vrgtr = np.ones(ntr) else: vrtr = df_extract[vcol].values - ntr = len(df_extract) + ntr = len(df_extract) vrgtr = np.copy(wt) twt = np.sum(vrgtr) -# sort +# sort vrtr,vrgtr = dsortem(0,ntr,vrtr,2,b=vrgtr) # Compute the cumulative probabilities and write transformation table @@ -2894,33 +3281,33 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc oldcp = 0.0 cp = 0.0 # print('ntr'); print(ntr) - for j in range(0,ntr): + for j in range(0,ntr): cp = cp + vrgtr[j]/twt w = (cp + oldcp)*0.5 vrg = gauinv(w) oldcp = cp # Now, reset the weight to the normal scores value: vrgtr[j] = vrg - - twt = np.sum(wt) + + twt = np.sum(wt) # Normal scores transform the data for id in range(0,nd): - if itrans == 1: + if itrans == 1: vrr = vr[id] j = dlocate(vrtr,1,nd,vrr) j = min(max(0,j),(nd-2)) vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) if vrg < vrgtr[0]: vrg = vrgtr[0] if(vrg > vrgtr[nd-1]): vrg = vrgtr[nd-1] - vr[id] = vrg - + vr[id] = vrg + weighted_stats_orig = DescrStatsW(vr_orig,weights=wt) - orig_av = weighted_stats_orig.mean - orig_ss = weighted_stats_orig.var - + orig_av = weighted_stats_orig.mean + orig_ss = weighted_stats_orig.var + weighted_stats = DescrStatsW(vr,weights=wt) - av = weighted_stats.mean - ss = weighted_stats.var + av = weighted_stats.mean + ss = weighted_stats.var print('\n Data for SGSIM: Number of acceptable data = ' + str(nd)) print(' Number trimmed = ' + str(len(df)- nd)) @@ -2928,13 +3315,13 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc print(' Weighted Variance = ' + str(round(orig_ss,4))) print(' Weighted Transformed Average = ' + str(round(av,4))) print(' Weighted Transformed Variance = ' + str(round(ss,4))) - -# Read in secondary data + +# Read in secondary data sim = np.random.rand(nx*ny) index = 0 for ixy in range(0,nxy): sim[index] = index - + lvm = []; lvm = np.array(lvm) if ktype >= 2: #lvm = np.copy(sec_map.flatten()) @@ -2942,10 +3329,10 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc lvm = np.zeros(nxy) for iy in range(0,ny): for ix in range(0,nx): - lvm[ind] = sec_map[ny-iy-1,ix] + lvm[ind] = sec_map[ny-iy-1,ix] ind = ind + 1 - if ktype == 2 and itrans == 1: - for ixy in range(0,nxy): + if ktype == 2 and itrans == 1: + for ixy in range(0,nxy): # Do we to transform the secondary variable for a local mean? vrr = lvm[ixy] j = dlocate(vrtr,1,ntr,vrr) @@ -2953,17 +3340,17 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) if vrg < vrgtr[0]: vrg = vrgtr[0] if(vrg > vrgtr[ntr-1]): vrg = vrgtr[nd-1] - lvm[ixy] = vrg + lvm[ixy] = vrg av = np.average(lvm) ss = np.var(lvm) print(' Secondary Data: Number of data = ' + str(nx*ny)) print(' Equal Weighted Average = ' + str(round(av,4))) - print(' Equal Weighted Variance = ' + str(round(ss,4))) + print(' Equal Weighted Variance = ' + str(round(ss,4))) # Do we need to work with data residuals? (Locally Varying Mean) if ktype == 2: sec = np.zeros(nd) - for idd in range(0,nd): + for idd in range(0,nd): ix = getindex(nx,xmn,xsiz,x[idd]) iy = getindex(ny,ymn,ysiz,y[idd]) index = ix + (iy-1)*nx @@ -2973,7 +3360,7 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Do we need to get an external drift attribute for the data? if ktype == 3: - for idd in range(0,nd): + for idd in range(0,nd): if sec[i] != UNEST: ix = getindx(nx,xmn,xsiz,x[idd]) iy = getindx(ny,ymn,ysiz,y[idd]) @@ -2984,26 +3371,26 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if ktype == 4: order_sec = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): - order_sec[ixy] = ind + for ixy in range(0,nxy): + order_sec[ixy] = ind ind = ind + 1 print(' Transforming Secondary Data with') - print(' variance reduction of ' + str(varred)) + print(' variance reduction of ' + str(varred)) lvm,order_sec = dsortem(0,nxy,lvm,2,b=order_sec) oldcp = 0.0 cp = 0.0 - for i in range(0,nxy): + for i in range(0,nxy): cp = cp + (1.0/(nxy)) w = (cp + oldcp)/2.0 lvm[i] = gauinv(w) lvm[i] = lvm[i] * varred oldcp = cp - order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) + order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) # return np.reshape(lvm,(ny,nx)) # check the transform # Set up the rotation/anisotropy matrices that are needed for the # variogram and search. - print('Setting up rotation matrices for variogram and search') + print('Setting up rotation matrices for variogram and search') if nst == 1: rotmat = setrot(ang[0],ang[0],sang1,anis[0],anis[0],sanis1,nst,MAXROT=2) else: @@ -3012,46 +3399,46 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc rotmat_2d, maxcov = setup_rotmat2(c0,nst,it,cc,ang) # will use one in the future # print('MaxCov = ' + str(maxcov)) - -# Make a KDTree for fast search of nearest neighbours + +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,nd)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - + # Set up the covariance table and the spiral search: cov_table,tmp,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,rotmat,radsqd) - + # print('Covariance Table'); print(cov_table) # MAIN LOOP OVER ALL THE SIMULAUTIONS: for isim in range(0,nsim): - + # Work out a random path for this realization: sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): - order[ixy] = ind + for ixy in range(0,nxy): + order[ixy] = ind ind = ind + 1 - + # The multiple grid search works with multiples of 4 (yes, that is # somewhat arbitrary): if mults == 1: - for imult in range(0,nmult): + for imult in range(0,nmult): nny = int(max(1,ny/((imult+1)*4))) nnx = int(max(1,nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) jy = 1 jx = 1 - for iy in range(0,nny): + for iy in range(0,nny): if nny > 0: jy = iy*(imult+1)*4 for ix in range(0,nnx): if nnx > 0: jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) - + # Initialize the simulation: sim, order = dsortem(0,nxy,sim,2,b=order) sim.fill(UNEST) @@ -3064,7 +3451,7 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # print('data'); print(x[idd],y[idd]) ix = getindex(nx,xmn,xsiz,x[idd]) iy = getindex(ny,ymn,ysiz,y[idd]) - ind = ix + (iy-1)*nx + ind = ix + (iy-1)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) @@ -3075,7 +3462,7 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if sim[ind] > 0.0: id2 = int(sim[ind]+0.5) test2 = abs(xx-x(id2)) + abs(yy-y(id2)) - if test <= test2: + if test <= test2: sim[ind] = idd else: sim[ind] = id2 @@ -3084,25 +3471,25 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if sstrat == 0 and test <= TINY: sim[ind]=10.0*UNEST # Now, enter data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0,nxy): idd = int(sim[ind]+0.5) if idd > 0: sim[ind] = vr[id] - irepo = max(1,min((nxy/10),10000)) + irepo = max(1,min((nxy/10),10000)) # MAIN LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0,nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) - + # Figure out the location of this point and make sure it has # not been assigned a value already: index = int(order[ind]+0.5) if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) + iy = int((index)/nx) ix = index - (iy)*nx xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + yy = ymn + (iy)*ysiz current_node = (yy,xx) # print('Current_node'); print(current_node) @@ -3120,22 +3507,22 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc else: dist, nums = tree.query(current_node,ndmax) # remove any data outside search radius - + # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) nums = nums[dist 1: MAXSBY = int(ny/2) - if MAXSBY > 50: + if MAXSBY > 50: MAXSBY=50 # print('ncut'); print(ncut) - + # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) - aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) + nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) + aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) ang = np.zeros((ncut,MAXNST)); anis = np.zeros((ncut,MAXNST)) # print('varios - 1 vario'); print(vario[1]) - + for icut in range(0,ncut): # print('icut'); print(icut) nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; - ang[icut,0] = vario[icut]['azi1']; + c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; + ang[icut,0] = vario[icut]['azi1']; aa[icut,0] = vario[icut]['hmaj1']; anis[icut,0] = vario[icut]['hmin1']/vario[icut]['hmaj1']; if nst[icut] == 2: - cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; + cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; aa[icut,1] = vario[icut]['hmaj2']; anis[icut,1] = vario[icut]['hmin2']/vario[icut]['hmaj2']; - + # print('check loaded cov model- icut '); print(icut) # print(cc[icut],aa[icut],it[icut],ang[icut],anis[icut]) - - - + + + # Load the data df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax MAXDAT = len(df_extract) @@ -3357,8 +3744,8 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin MAXROT = MAXNST*MAXCUT+ 1 ikout = np.zeros((nx,ny,ncut)) maxcov = np.zeros(ncut) - - # Allocate the needed memory: + + # Allocate the needed memory: xa = np.zeros(MAXSAM) ya = np.zeros(MAXSAM) vra = np.zeros(MAXSAM) @@ -3370,42 +3757,42 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin a = np.zeros(MAXEQ*MAXEQ) ikmap = np.zeros((nx,ny,ncut)) vr = np.zeros((MAXDAT,MAXCUT+1)) - + nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) xviol = np.zeros(MAXCUT) - + ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) ikout = np.zeros((nx,ny,ncut)) - + x = df_extract[xcol].values y = df_extract[ycol].values v = df_extract[vcol].values - - MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used + + MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used gcut = np.zeros(MAXTAB) - + # The indicator data are constructed knowing the thresholds and the # data value. - + # print('ncut'); print(ncut) if ivtype == 0: - for icut in range(0,ncut): + for icut in range(0,ncut): vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): + for icut in range(0,ncut): vr[:,icut] = np.where(v <= thresh[icut], '1', '0') vr[:,ncut] = v # print('loaded data '); print(vr) -# Make a KDTree for fast search of nearest neighbours +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,MAXDAT)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - + # Summary statistics of the input data - + avg = vr[:,ncut].mean() stdev = vr[:,ncut].std() ss = stdev**2.0 @@ -3414,37 +3801,37 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - + actloc = np.zeros(MAXDAT, dtype = int) # need to set up data at node locations for i in range(1,MAXDAT): actloc[i] = i - + # Set up the rotation/anisotropy matrices that are needed for the # variogram and search: print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): + for ic in range(0,ncut): rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[ic],cc[ic],ang[ic],9999.9) - rotmat.append(rotmat_temp) - + rotmat.append(rotmat_temp) + #return rotmat # Set up the covariance table and the spiral search based just on the first variogram # This is ok as we are not using the covariance look up table, just spiral search for previous nodes isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here - + # print('ang[0]'); print(ang[0]) - + if nst[0] == 1: global_rotmat = setrot(ang[0,0],ang[0,0],sang1,anis[0,0],anis[0,0],sanis1,nst[0],MAXROT=2) else: global_rotmat = setrot(ang[0,0],ang[1,0],sang1,anis[0,0],anis[1,0],sanis1,nst[0],MAXROT=2) - + cov_table,tmp2,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) - - + xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) + + # print('spiral search number nodes '); print(nlooku) # print('ixnode,iynode'); print(ixnode,iynode) # Initialize accumulators: # not setup yet @@ -3459,7 +3846,7 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('Working on the kriging') # Report on progress from time to time: - if koption == 0: + if koption == 0: nxy = nx*ny nloop = nxy irepo = max(1,min((nxy/10),10000)) @@ -3467,43 +3854,43 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin nloop = 10000000 irepo = max(1,min((nd/10),10000)) ddh = 0.0 - + # MAIN LOOP OVER ALL THE SIMULAUTIONS: # for isim in range(0,nsim): # will add multiple realizations soon - + # Work out a random path for this realization: sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): - order[ixy] = ind + for ixy in range(0,nxy): + order[ixy] = ind ind = ind + 1 - + # Multiple grid search works with multiples of 4 (yes, that is # soat arbitrary): if mults == 1: - for imult in range(0,nmult): + for imult in range(0,nmult): nny = int(max(1,ny/((imult+1)*4))) nnx = int(max(1,nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) jy = 1 jx = 1 - for iy in range(0,nny): + for iy in range(0,nny): if nny > 0: jy = iy*(imult+1)*4 for ix in range(0,nnx): if nnx > 0: jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) - + # Inlize the simulation: sim, order = dsortem(0,nxy,sim,2,b=order) sim.fill(UNEST) tmp.fill(0.0) print('Working on a single realization, seed ' + str(seed)) # print('Random Path'); print(order) - + # As the data to the closest grid node: TINY = 0.0001 @@ -3511,48 +3898,48 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('data'); print(x[idd],y[idd]) ix = getindex(nx,xmn,xsiz,x[idd]) iy = getindex(ny,ymn,ysiz,y[idd]) - ind = ix + (iy-1)*nx + ind = ix + (iy-1)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) test = abs(xx-x[idd]) + abs(yy-y[idd]) # As this data to the node (unless there is a closer data): - if sstrat == 1 or (sstrat == 0 and test <= TINY): + if sstrat == 1 or (sstrat == 0 and test <= TINY): if sim[ind] > UNEST: id2 = int(sim[ind]+0.5) test2 = abs(xx-x[id2]) + abs(yy-y[id2]) - if test <= test2: + if test <= test2: sim[ind] = idd else: sim[ind] = idd # As a flag so that this node does not get simulated: - + # Another data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0,nxy): idd = int(sim[ind]+0.5) - if idd > 0: + if idd > 0: sim[ind] = vr[idd] - else: + else: tmp[ind] = sim[ind] sim[ind] = UNEST - irepo = max(1,min((nxy/10),10000)) + irepo = max(1,min((nxy/10),10000)) # LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0,nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) - + # Find the index on the random path, check if assigned data and get location index = int(order[ind]+0.5) if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) + iy = int((index)/nx) ix = index - (iy)*nx xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + yy = ymn + (iy)*ysiz current_node = (yy,xx) # print('Current_node'); print(current_node) @@ -3570,34 +3957,34 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin else: dist, nums = tree.query(current_node,ndmax) # remove any data outside search radius - + # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) nums = nums[dist 0: - for icut in range(0,ncut): + for icut in range(0,ncut): cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): + for icut in range(0,ncut): cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') cnodeiv[ncut,:] = cnodev # print('indicator transformed nearest nodes'); print(cnodeiv) - + # print('srchnd'); print(ncnode,icnode,cnodev,cnodex,cnodey) # print('Result of srchnd, cnodex = '); print(cnodex) nclose = na @@ -3614,8 +4001,8 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # check inputs # print('nst'); print(nst) - - + + if nclose + ncnode <= 0: # print('nclose & ncnode'); print(nclose, ncnode) zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) @@ -3627,9 +4014,9 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('check kriging cov model- icut '); print(ic) # print('node data values for kriging'); print(cnodev) # print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) - #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) + #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) if ktype == 0: - gmean = gcdf[ic] + gmean = gcdf[ic] elif ktype == 2: gmean = trend1d[index,ic] else: @@ -3637,27 +4024,27 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('gmean'); print(gmean) ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], - rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) + rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) # print('ccdf'); print(ccdf) - + # Correct order relations: ccdfo = ordrel(ivtype,ncut,ccdf) # Draw from the local distribution: zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) sim[index] = zval # print('zval'); print(zval) - + # END MAIN LOOP OVER SIMULATIONS: sim_out = np.zeros((ny,nx)) for ind in range(0,nxy): - iy = int((ind)/nx) + iy = int((ind)/nx) ix = ind - (iy)*nx sim_out[ny-iy-1,ix] = sim[ind] - return sim_out - - + return sim_out + + def kb2d_locations( df, xcol, @@ -3695,30 +4082,30 @@ def kb2d_locations( :param vario: :return: """ - + # Constants UNEST = -999. EPSLON = 1.0e-10 VERSION = 2.907 first = True - PMX = 9999.0 + PMX = 9999.0 MAXSAM = ndmax + 1 MAXKD = MAXSAM + 1 MAXKRG = MAXKD * MAXKD - + # load the variogram nst = vario['nst'] cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) ang = np.zeros(nst); anis = np.zeros(nst) - - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; + + c0 = vario['nug']; + cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; + cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; - -# Allocate the needed memory: + +# Allocate the needed memory: xa = np.zeros(MAXSAM) ya = np.zeros(MAXSAM) vra = np.zeros(MAXSAM) @@ -3738,14 +4125,14 @@ def kb2d_locations( x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values - + # Load the estimation loactions nd_loc = len(df_loc) x_loc = df_loc[xcol].values y_loc = df_loc[ycol].values vr_loc = df_loc[vcol].values - -# Make a KDTree for fast search of nearest neighbours + +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,nd)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) @@ -3773,13 +4160,13 @@ def kb2d_locations( nk = 0 ak = 0.0 vk = 0.0 - + for idata in range(len(df_loc)): print('Working on location ' + str(idata)) xloc = x_loc[idata] - yloc = y_loc[idata] + yloc = y_loc[idata] current_node = (yloc,xloc) - + # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index @@ -3789,8 +4176,8 @@ def kb2d_locations( # remove any data outside search radius na = len(dist) nums = nums[dist= 0.0)&(ang1<270.0): - alpha = (90.0 - ang1) * DEG2RAD - else: - alpha = (450.0 - ang1) * DEG2RAD - beta = -1.0 * ang2 *DEG2RAD - theta = ang3 * DEG2RAD - - sina = np.sin(alpha) - sinb = np.sin(beta) - sint = np.sin(theta) - cosa = np.cos(alpha) - cosb = np.cos(beta) - cost = np.cos(theta) - ### Construct the rotation matrix in the required memory - - afac1 = 1.0/max(anis1, EPSLON) - afac2 = 1.0/max(anis2, EPSLON) - rotmat[ind,0,0] = cosb * cosa - rotmat[ind,0,1] = cosb * sina - rotmat[ind,0,2] = -sinb - rotmat[ind,1,0] = afac1*(-cost*sina + sint*sinb*cosa) - rotmat[ind,1,1] = afac1*(cost*cosa + sint*sinb*sina) - rotmat[ind,1,2] = afac1*( sint * cosb) - rotmat[ind,2,0] = afac2*(sint*sina + cost*sinb*cosa) - rotmat[ind,2,1] = afac2*(-sint*cosa + cost*sinb*sina) - rotmat[ind,2,2] = afac2*(cost * cosb) - - return rotmat - -def cova3(x1,y1,z1,x2,y2,z2,nst,c0,it,cc,aa,rotmat,cmax,ivarg=1,irot=0, MAXNST=4): - """Covariance Between Two Points - 3D - This function calculated the covariance associated with a variogram - model specified by a nugget effect and nested varigoram structures. - The anisotropy definition can be different for each nested structure.""" - - """ - Converted from original fortran GSLIB (Deutsch and Journel, 1998) to Python by Wendi Liu, University of Texas at Austin - - INPUT VARIABLES: - - x1,y1,z1 coordinates of first point - x2,y2,z2 coordinates of second point - nst number of nested structures (maximum of 4) - ivarg variogram number (set to 1 unless doing cokriging - or indicator kriging) - MAXNST size of variogram parameter arrays - c0 isotropic nugget constant - it type of each nested structure: - 1. spherical model of range a; - 2. exponential model of parameter a; - i.e. practical range is 3a - 3. gaussian model of parameter a; - i.e. practical range is a*sqrt(3) - 4. power model of power a (a must be greater than 0 and - less than 2). if linear model, a=1,c=slope. - cc multiplicative factor of each nested structure. - (sill-c0) for spherical, exponential,and gaussian - slope for linear model. - aa parameter "a" of each nested structure. - irot index of the rotation matrix for the first nested - structure (irot starts from 0; the second nested structure will use - irot+1, the third irot+2, and so on) - rotmat rotation matrices""" - - EPSLON = 1e-10 - PMX=1e10 - ### Calculate the maximum covariance value (used for zero distances and for power model covariance): - istart = 1+ (ivarg-1) * MAXNST - - for ii in range(nst):##nst[ivarg-1] if ivarg>1 - ist = istart+ ii-1 - if it[ist] == 4: - cmax = cmax+PMX - else: - cmax = cmax+cc[ist] - - # Check for very small distance - hsqd = sqdist3(x1,y1,z1,x2,y2,z2,irot,rotmat) - if hsqd < EPSLON: - cova = cmax - return cmax,cova - - # Non-zero distance, loop over all the structures - cova = 0.0 - for js in range(nst):##nst[ivarg-1] if ivarg>1 - ist = istart+js-1 - # Compute the appropriate structural distance -# if ist!=0: -# ir = min((irot+js),MAXROT) -# hsqd = sqdist(x1,y1,z1,x2,y2,z2,ir,MAXROT,rotmat) - h = np.sqrt(hsqd) - if it[ist] == 1: ##Spherical - hr = h/aa[ist] - if hr<1: - cova+=cc[ist]*(1.0-hr*(1.5-0.5*hr*hr)) - elif it[ist] == 2: ##Exponential - cova += cc[ist]*np.exp(-3.0*h/aa[ist]) - elif it[ist] == 3: ##Gaussian - cova += cc[ist]*np.exp(-(3.0*h/aa[ist])*(3.0*h/aa[ist])) - elif it[ist] == 4: ##Power - cova += cmax-cc[ist]*h**aa[ist] - - return cmax, cova - -def gammabar(xsiz, ysiz, zsiz,nst,c0,it,cc,hmaj,hmin,hvert): - """This program calculates the gammabar value from a 3D semivariogram model""" - """Converted from original fortran GSLIB (Deutsch and Journel, 1998) to Python by Wendi Liu, University of Texas at Austin""" - - ###Initialization - rotmat = np.zeros((5, 3, 3)) - EPSLON = 1.0e-20 - MAXNST=4 - maxcov=1.0 - cmax = c0 - nx = 3 - ny = 3 - nz = 6 - ang1 = np.zeros((MAXNST,)) #azimuth - ang2 = np.ones((MAXNST,))*90.0 #dip - ang3 = np.zeros((MAXNST,)) #plenge - anis1 = np.zeros((MAXNST,)) - anis2 = np.zeros((MAXNST,)) - - for i in range(nst): - anis1[i] = hmin[i]/max(hmaj[i],EPSLON) - anis2[i] = hvert[i]/max(hmaj[i],EPSLON) - rotmat = setrot3(ang1[i],ang2[i],ang3[i],anis1[i],anis2[i],i,rotmat) - cmax,cov = cova3(0.0,0.0,0.0,0.0,0.0,0.0,nst,c0,it,cc,hmaj,rotmat,cmax) - ##Discretization parameters - xsz = xsiz/nx - xmn = xsz/2.0 - xzero = xsz * 0.0001 - ysz = ysiz/ny - ymn = ysz/2.0 - yzero = ysz * 0.0001 - zsz = zsiz/nz - zmn = zsz/2.0 - zzero = zsz * 0.0001 - ##Calculate Gammabar - gb = 0.0 - for ix in range(nx): - xxi = xmn +(ix-1)*xsz+xzero - for jx in range(nx): - xxj = xmn +(jx-1)*xsz - for iy in range(ny): - yyi = ymn +(iy-1)*ysz+yzero - for jy in range(ny): - yyj = ymn +(jy-1)*ysz - for iz in range(nz): - zzi = zmn +(iz-1)*zsz+zzero - for jz in range(nz): - zzj = zmn +(jz-1)*zsz - cmax,cov = cova3(xxi,yyi,zzi,xxj,yyj,zzj,nst,c0,it,cc,hmaj,rotmat,cmax) - gb += maxcov-cov - gb = gb/((nx*ny*nz)**2) - return gb - From c8b56b1fdd2f190954293ac85eea778eed1460c0 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Thu, 31 Oct 2019 15:16:11 -0500 Subject: [PATCH 02/10] add parallelization with numba --- .gitignore | 125 ++ geostatspy/geostats.py | 2456 ++++++++++++++++++++++------------------ 2 files changed, 1498 insertions(+), 1083 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f717486 --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 09c2242..02f006d 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -16,6 +16,7 @@ """ import math # for trig functions etc. +from copy import copy # to not change objects passed by ref from bisect import bisect # for maintaining array elements sorted import numpy as np # for ndarrays @@ -26,7 +27,10 @@ from numba.typed import Dict # typing of dictionaries from statsmodels.stats.weightstats import DescrStatsW -def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +JITKW = dict(nopython=True, cache=True, fastmath=True) + + +def backtr(df, vcol, vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar): """Back transform an entire DataFrame column with a provided transformation table and tail extrapolation. :param df: the source DataFrame :param vcol: the column with the variable to transfrom @@ -41,42 +45,45 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :return: TODO """ - EPSLON=1.0e-20 - nd = len(df); nt = len(vr) # number of data to transform and number of data in table + EPSLON = 1.0e-20 + nd = len(df) + nt = len(vr) # number of data to transform and number of data in table backtr = np.zeros(nd) vrgs = df[vcol].values # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): - for id in range(0,nd): + for id in range(0, nd): if vrgs[id] <= vrg[0]: backtr[id] = vr[0] - cdflo = gcum(vrg[0]) - cdfbt = gcum(vrgs[id]) + cdflo = gcum(vrg[0]) + cdfbt = gcum(vrgs[id]) if ltail == 1: - backtr[id] = powint(0.0,cdflo,zmin,vr[0],cdfbt,1.0) + backtr[id] = powint(0.0, cdflo, zmin, vr[0], cdfbt, 1.0) elif ltail == 2: - cpow = 1.0 / ltpar - backtr[id] = powint(0.0,cdflo,zmin,vr[0],cdfbt,cpow) + cpow = 1.0 / ltpar + backtr[id] = powint(0.0, cdflo, zmin, vr[0], cdfbt, cpow) # Value in the upper tail? 1=linear, 2=power, 4=hyperbolic: elif vrgs[id] >= vrg[nt-1]: backtr[id] = vr[nt-1] - cdfhi = gcum(vrg[nt-1]) - cdfbt = gcum(vrgs[id]) + cdfhi = gcum(vrg[nt-1]) + cdfbt = gcum(vrgs[id]) if utail == 1: - backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) + backtr[id] = powint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, 1.0) elif utail == 2: - cpow = 1.0 / utpar - backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) + cpow = 1.0 / utpar + backtr[id] = powint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, cpow) elif utail == 4: plambda = (vr[nt-1]**utpar)*(1.0-gcum(vrg[nt-1])) backtr[id] = (plambda/(1.0-gcum(vrgs)))**(1.0/utpar) else: # Value within the transformation table: - j = locate(vrg,1,nt,vrgs[id]) - j = max(min((nt-2),j),1) - backtr[id] = powint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs[id],1.0) + j = locate(vrg, 1, nt, vrgs[id]) + j = max(min((nt-2), j), 1) + backtr[id] = powint(vrg[j], vrg[j+1], vr[j], + vr[j+1], vrgs[id], 1.0) return backtr -def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): + +def backtr_value(vrgs, vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar): """Back transform a single value with a provided transformation table and tail extrapolation. :param vrgs: value to transform :param vr: the transformation table, 1D ndarray with the original values @@ -89,38 +96,39 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utpar: upper tail extrapolation parameter :return: TODO """ - EPSLON=1.0e-20 - nt = len(vr) # number of data to transform + EPSLON = 1.0e-20 + nt = len(vr) # number of data to transform # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): if vrgs <= vrg[0]: backtr = vr[0] - cdflo = gcum(vrg[0]) - cdfbt = gcum(vrgs) + cdflo = gcum(vrg[0]) + cdfbt = gcum(vrgs) if ltail == 1: - backtr = dpowint(0.0,cdflo,zmin,vr[0],cdfbt,1.0) + backtr = dpowint(0.0, cdflo, zmin, vr[0], cdfbt, 1.0) elif ltail == 2: - cpow = 1.0 / ltpar - backtr = dpowint(0.0,cdflo,zmin,vr[0],cdfbt,cpow) + cpow = 1.0 / ltpar + backtr = dpowint(0.0, cdflo, zmin, vr[0], cdfbt, cpow) # Value in the upper tail? 1=linear, 2=power, 4=hyperbolic: elif vrgs >= vrg[nt-1]: backtr = vr[nt-1] - cdfhi = gcum(vrg[nt-1]) - cdfbt = gcum(vrgs) + cdfhi = gcum(vrg[nt-1]) + cdfbt = gcum(vrgs) if utail == 1: - backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) + backtr = dpowint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, 1.0) elif utail == 2: - cpow = 1.0 / utpar - backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) + cpow = 1.0 / utpar + backtr = dpowint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, cpow) elif utail == 4: plambda = (vr[nt-1]**utpar)*(1.0-gcum(vrg[nt-1])) backtr = (plambda/(1.0-gcum(vrgs)))**(1.0/utpar) else: -# Value within the transformation table: - j = dlocate(vrg,1,nt,vrgs) - j = max(min((nt-2),j),1) - backtr = dpowint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs,1.0) + # Value within the transformation table: + j = dlocate(vrg, 1, nt, vrgs) + j = max(min((nt-2), j), 1) + backtr = dpowint(vrg[j], vrg[j+1], vr[j], vr[j+1], vrgs, 1.0) return backtr + def gcum(x): """Calculate the cumulative probability of the standard normal distribution. :param x: the value from the standard normal distribution @@ -130,19 +138,21 @@ def gcum(x): z = x if z < 0: z = -z - t = 1./(1.+ 0.2316419*z) - gcum = t*(0.31938153 + t*(-0.356563782 + t*(1.781477937 + t*(-1.821255978 + t*1.330274429)))) - e2 = 0. + t = 1./(1. + 0.2316419*z) + gcum = t*(0.31938153 + t*(-0.356563782 + t * + (1.781477937 + t*(-1.821255978 + t*1.330274429)))) + e2 = 0. # 6 standard deviations out gets treated as infinity: if z <= 6.: e2 = np.exp(-z*z/2.)*0.3989422803 - gcum = 1.0- e2 * gcum + gcum = 1.0 - e2 * gcum if x >= 0: return gcum gcum = 1.0 - gcum return gcum -def locate(xx,iis,iie,x): + +def locate(xx, iis, iie, x): """Return value `j` such that `x` is between `xx[j]` and `xx[j+1]`, where `xx` is an array of length `n`, and `x` is a given value. `xx` must be monotonic, either increasing or decreasing (GSLIB version). @@ -176,6 +186,7 @@ def locate(xx,iis,iie,x): j = jl return j + def dlocate(xx, iis, iie, x): """Return value `j` such that `x` is between `xx[j]` and `xx[j+1]`, where `xx` is an array of length `n`, and `x` is a given value. `xx` must be @@ -194,7 +205,8 @@ def dlocate(xx, iis, iie, x): j = bisect(array, x) return j -def powint(xlow,xhigh,ylow,yhigh,xval,power): + +def powint(xlow, xhigh, ylow, yhigh, xval, power): """Power-based interpolator :param xlow: x lower interval :param xhigh: x upper interval @@ -204,7 +216,7 @@ def powint(xlow,xhigh,ylow,yhigh,xval,power): :param power: power for interpolation :return: TODO """ - EPSLON=1.0e-20 + EPSLON = 1.0e-20 if (xhigh-xlow) < EPSLON: powint = (yhigh+ylow)/2.0 else: @@ -270,6 +282,7 @@ def dsortem(ib, ie, a, iperm, b=0, c=0, d=0, e=0, f=0, g=0, h=0): h = h_slice[inds] return a, b, c, d, e, f, g, h # TODO: changed from 'a, b, c, d, e, f, h' + def gauinv(p): """Compute the inverse of the standard normal cumulative distribution function. @@ -366,26 +379,31 @@ def dpowint(xlow, xhigh, ylow, yhigh, xval, pwr): ) return dpowint_ -#@jit(nopython=True) # all NumPy array operations included in this function for precompile with NumBa -def setup_rotmat2(c0,nst,it,cc,ang): - DTOR=3.14159265/180.0; EPSLON=0.000000; PI=3.141593 +# @jit(**JITKW) # all NumPy array operations included in this function for precompile with NumBa + + +def setup_rotmat2(c0, nst, it, cc, ang): + DTOR = 3.14159265/180.0 + EPSLON = 0.000000 + PI = 3.141593 # The first time around, re-initialize the cosine matrix for the # variogram structures: - rotmat = np.zeros((4,nst)) + rotmat = np.zeros((4, nst)) maxcov = c0 - for js in range(0,nst): + for js in range(0, nst): azmuth = (90.0-ang[js])*DTOR - rotmat[0,js] = math.cos(azmuth) - rotmat[1,js] = math.sin(azmuth) - rotmat[2,js] = -1*math.sin(azmuth) - rotmat[3,js] = math.cos(azmuth) + rotmat[0, js] = math.cos(azmuth) + rotmat[1, js] = math.sin(azmuth) + rotmat[2, js] = -1*math.sin(azmuth) + rotmat[3, js] = math.cos(azmuth) if it[js] == 4: maxcov = maxcov + 9999.9 else: maxcov = maxcov + cc[js] return rotmat, maxcov -@jit(nopython=True) + +@jit(**JITKW) def setup_rotmat(c0, nst, it, cc, ang, pmx): """Setup rotation matrix. :param c0: nugget constant (isotropic) @@ -416,7 +434,7 @@ def setup_rotmat(c0, nst, it, cc, ang, pmx): return rotmat, maxcov -@jit(nopython=True) +@jit(**JITKW) def cova2(x1, y1, x2, y2, nst, c0, pmx, cc, aa, it, ang, anis, rotmat, maxcov): """Calculate the covariance associated with a variogram model specified by a nugget effect and nested variogram structures. @@ -471,18 +489,20 @@ def cova2(x1, y1, x2, y2, nst, c0, pmx, cc, aa, it, ang, anis, rotmat, maxcov): cova2_ = cova2_ + cov1 return cova2_ -def sqdist(x1,y1,z1,x2,y2,z2,ind,rotmat): -# Compute component distance vectors and the squared distance: + +def sqdist(x1, y1, z1, x2, y2, z2, ind, rotmat): + # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 dz = 0.0 sqdist = 0.0 - for i in range(0,2): - cont = rotmat[ind,i,1] * dx + rotmat[ind,i,2] * dy + for i in range(0, 2): + cont = rotmat[ind, i, 1] * dx + rotmat[ind, i, 2] * dy sqdist = sqdist + cont * cont return sqdist -def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): + +def sqdist2(x1, y1, x2, y2, ist, rotmat, anis): """Calculate the 2D square distance based on geometric ani :param x1: x coordinate of first point :param y1: y coordinate of first point @@ -497,65 +517,68 @@ def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 - dx1 = (dx*rotmat[0,ist] + dy*rotmat[1,ist]) - dy1 = (dx*rotmat[2,ist] + dy*rotmat[3,ist])/anis[ist] + dx1 = (dx*rotmat[0, ist] + dy*rotmat[1, ist]) + dy1 = (dx*rotmat[2, ist] + dy*rotmat[3, ist])/anis[ist] sqdist_ = (dx1*dx1+dy1*dy1) return sqdist_ -def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): + +def setrot(ang1, ang2, sang1, anis1, anis2, sanis1, nst, MAXROT): """GSLIB's SETROT subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ - DEG2RAD = 3.141592654/180.0; EPSLON=1.e-20 - rotmat = np.zeros((MAXROT+1,3,3)) + DEG2RAD = 3.141592654/180.0 + EPSLON = 1.e-20 + rotmat = np.zeros((MAXROT+1, 3, 3)) if ang1 >= 0.0 and ang1 < 270.0: - alpha = (90.0 - ang1) * DEG2RAD + alpha = (90.0 - ang1) * DEG2RAD else: - alpha = (450.0 - ang1) * DEG2RAD + alpha = (450.0 - ang1) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac1 = 1.0 / (max(anis1,EPSLON)) - rotmat[0,1,1] = cosa - rotmat[0,1,2] = sina - rotmat[0,2,1] = afac1*(-sina) - rotmat[0,2,2] = afac1*(cosa) + afac1 = 1.0 / (max(anis1, EPSLON)) + rotmat[0, 1, 1] = cosa + rotmat[0, 1, 2] = sina + rotmat[0, 2, 1] = afac1*(-sina) + rotmat[0, 2, 2] = afac1*(cosa) # 2nd structure if present if nst > 1: if ang2 >= 0.0 and ang2 < 270.0: - alpha = (90.0 - ang2) * DEG2RAD + alpha = (90.0 - ang2) * DEG2RAD else: - alpha = (450.0 - ang2) * DEG2RAD + alpha = (450.0 - ang2) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac2 = 1.0 / (max(anis2,EPSLON)) - rotmat[1,1,1] = cosa - rotmat[1,1,2] = sina - rotmat[1,2,1] = afac1*(-sina) - rotmat[1,2,2] = afac1*(cosa) + afac2 = 1.0 / (max(anis2, EPSLON)) + rotmat[1, 1, 1] = cosa + rotmat[1, 1, 2] = sina + rotmat[1, 2, 1] = afac1*(-sina) + rotmat[1, 2, 2] = afac1*(cosa) # search rotation if sang1 >= 0.0 and sang1 < 270.0: - alpha = (90.0 - sang1) * DEG2RAD + alpha = (90.0 - sang1) * DEG2RAD else: - alpha = (450.0 - sang1) * DEG2RAD + alpha = (450.0 - sang1) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac1 = 1.0 / (max(sanis1,EPSLON)) - rotmat[MAXROT,1,1] = cosa - rotmat[MAXROT,1,2] = sina - rotmat[MAXROT,2,1] = afac1*(-sina) - rotmat[MAXROT,2,2] = afac1*(cosa) + afac1 = 1.0 / (max(sanis1, EPSLON)) + rotmat[MAXROT, 1, 1] = cosa + rotmat[MAXROT, 1, 2] = sina + rotmat[MAXROT, 2, 1] = afac1*(-sina) + rotmat[MAXROT, 2, 2] = afac1*(cosa) # Return to calling program: return rotmat -@jit(nopython=True) + +@jit(**JITKW) def ksol_numpy(neq, a, r): """Find solution of a system of linear equations. :param neq: number of equations @@ -571,7 +594,8 @@ def ksol_numpy(neq, a, r): s = ainv @ r # matrix multiplication return s -def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,global_rotmat,radsqd): + +def ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MAXXYZ, xsiz, ysiz, isrot, nx, ny, nst, c0, cc, aa, it, ang, anis, global_rotmat, radsqd): """GSLIB's CTABLE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -580,46 +604,48 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Declare constants TINY = 1.0e-10 PMX = 9999.9 - MAXROT=2 + MAXROT = 2 # Size of the look-up table: tmp = np.zeros(MAXXYZ) MAXORD = MAXXYZ - if (nx*ny) < MAXCXY: + if (nx*ny) < MAXCXY: MAXORD = MAXCXY order = np.zeros(MAXORD) - nctx = int(min(((MAXCTX-1)/2),(nx-1))) - ncty = int(min(((MAXCTY-1)/2),(ny-1))) + nctx = int(min(((MAXCTX-1)/2), (nx-1))) + ncty = int(min(((MAXCTY-1)/2), (ny-1))) # print('CTable check') # print('nctx ' + str(nctx) + ', ncty ' + str(ncty)) ixnode = np.zeros(MAXXYZ) iynode = np.zeros(MAXXYZ) - covtab = np.zeros((MAXCTX,MAXCTY)) + covtab = np.zeros((MAXCTX, MAXCTY)) # Initialize the covariance subroutine and cbb at the same time: - rotmat, maxcov = setup_rotmat2(c0,nst,it,cc,ang) - cbb = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat2(c0, nst, it, cc, ang) + cbb = cova2(0.0, 0.0, 0.0, 0.0, nst, c0, PMX, cc, + aa, it, ang, anis, rotmat, maxcov) # Now, set up the table and keep track of the node offsets that are # within the search radius: - nlooku = -1 # adjusted for 0 origin - for i in range(-nctx,nctx+1): # cover entire range + nlooku = -1 # adjusted for 0 origin + for i in range(-nctx, nctx+1): # cover entire range xx = i * xsiz ic = nctx + i - for j in range(-ncty,ncty+1): # cover entire range + for j in range(-ncty, ncty+1): # cover entire range yy = j * ysiz jc = ncty + j - covtab[ic,jc] = cova2(0.0,0.0,xx,yy,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + covtab[ic, jc] = cova2( + 0.0, 0.0, xx, yy, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) # print('cov table offset'); print(xx,yy); print(covtab[ic,jc]) - hsqd = sqdist(0.0,0.0,0.0,xx,yy,0.0,MAXROT,global_rotmat) + hsqd = sqdist(0.0, 0.0, 0.0, xx, yy, 0.0, MAXROT, global_rotmat) if hsqd <= radsqd: nlooku = nlooku + 1 # We want to search by closest variogram distance (and use the # anisotropic Euclidean distance to break ties: - tmp[nlooku] = - (covtab[ic,jc] - TINY*hsqd) + tmp[nlooku] = - (covtab[ic, jc] - TINY*hsqd) order[nlooku] = (jc)*MAXCTX+ic # print('populated presort'); print(tmp,order) # Finished setting up the look-up table, now order the nodes such @@ -629,19 +655,20 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa nlooku = nlooku + 1 # print('nlooku' + str(nlooku)); print('MAXCTX' + str(MAXCTX)) - tmp, order = dsortem(0,nlooku,tmp,2,b=order) + tmp, order = dsortem(0, nlooku, tmp, 2, b=order) # print('populated postsort'); print(tmp,order) - for il in range(0,nlooku): + for il in range(0, nlooku): loc = int(order[il]) - iy = int((loc-0)/MAXCTX) - ix = loc - (iy-0)*MAXCTX + iy = int((loc-0)/MAXCTX) + ix = loc - (iy-0)*MAXCTX iynode[il] = int(iy) ixnode[il] = int(ix) # print('populated ix, iy node list'); print(ixnode, iynode) - return covtab,tmp,order,ixnode,iynode,nlooku,nctx,ncty + return covtab, tmp, order, ixnode, iynode, nlooku, nctx, ncty + -def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nctx,ncty,UNEST): +def srchnd(ix, iy, nx, ny, xmn, ymn, xsiz, ysiz, sim, noct, nodmax, ixnode, iynode, nlooku, nctx, ncty, UNEST): """GSLIB's SRCHND subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -649,25 +676,31 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc """ # Consider all the nearby nodes until enough have been found: - ncnode = 0; - icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) - cnodev = np.zeros(nodmax);cnodex = np.zeros(nodmax); cnodey = np.zeros(nodmax); + ncnode = 0 + icnode = np.zeros(nodmax, dtype=int) + icnode.fill(-1) + cnodev = np.zeros(nodmax) + cnodex = np.zeros(nodmax) + cnodey = np.zeros(nodmax) # print('Node search at '); print(ix,iy) # print('nlooku'); print(nlooku) if noct > 0: ninoct = np.zeros(8) - for il in range(0,nlooku): - if ncnode == nodmax: return ncnode, icnode, cnodev, cnodex, cnodey + for il in range(0, nlooku): + if ncnode == nodmax: + return ncnode, icnode, cnodev, cnodex, cnodey i = ix + (int(ixnode[il])-nctx) j = iy + (int(iynode[il])-ncty) # print('i,j'); print(i,j) - if i < 0 or j < 0: continue - if i >= nx or j >= ny: continue + if i < 0 or j < 0: + continue + if i >= nx or j >= ny: + continue ind = i + (j)*nx if sim[ind] > UNEST: icnode[ncnode] = il - cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin + cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin cnodey[ncnode] = ymn + (j)*ysiz cnodev[ncnode] = sim[ind] # print('srchnd found at index - ' +str(ind) + ' at x and y ' + str(cnodex[ncnode]) + ',' + str(cnodey[ncnode])) @@ -676,16 +709,18 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc ncnode = ncnode + 1 # moved to account for origin 0 return ncnode, icnode, cnodev, cnodex, cnodey -def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval): + +def beyond(ivtype, nccut, ccut, ccdf, ncut, cut, cdf, zmin, zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval): """GSLIB's BEYOND subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ - EPSLON = 1.0e-20; UNEST=-1.0 + EPSLON = 1.0e-20 + UNEST = -1.0 # Check for both "zval" and "cdfval" defined or undefined: - ierr = 1; + ierr = 1 if zval > UNEST and cdfva > UNEST: return -1 if zval <= UNEST and cdfval <= UNEST: @@ -694,7 +729,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Handle the case of a categorical variable: if ivtype == 0: cum = 0 - for i in range(0,nccut): + for i in range(0, nccut): cum = cum + ccdf[i] if cdfval <= cum: zval = ccut[i] @@ -704,7 +739,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Figure out what part of distribution: ipart = 0 - lower tail # ipart = 1 - middle # ipart = 2 - upper tail - ierr = 0 + ierr = 0 ipart = 1 if zva > UNEST: if zval <= ccut[0]: @@ -720,122 +755,136 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # ARE WE IN THE LOWER TAIL? if ipart == 0: - if ltail ==1: -# Straight Linear Interpolation: + if ltail == 1: + # Straight Linear Interpolation: powr = 1.0 if zval > UNEST: - cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,powr) + cdfval = powint(zmin, ccut[0], 0.0, ccdf[0], zval, powr) else: - zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) + zval = powint(0.0, ccdf[0], zmin, ccut[0], cdfval, powr) elif ltail == 2: -# Power Model interpolation to lower limit "zmin"? - if zval > UNEST: - cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,ltpar) - else: - powr = 1.0 / ltpar - zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) + # Power Model interpolation to lower limit "zmin"? + if zval > UNEST: + cdfval = powint(zmin, ccut[0], 0.0, ccdf[0], zval, ltpar) + else: + powr = 1.0 / ltpar + zval = powint(0.0, ccdf[0], zmin, ccut[0], cdfval, powr) # Linear interpolation between the rescaled global cdf? elif ltail == 3: if zval > UNEST: -# Computing the cdf value. Locate the point and the class bound: - idat = locate(cut,1,ncut,zval) - iupp = locate(cut,ncut,1,ncut,ccut[0]) + # Computing the cdf value. Locate the point and the class bound: + idat = locate(cut, 1, ncut, zval) + iupp = locate(cut, ncut, 1, ncut, ccut[0]) # Straight linear interpolation if no data; otherwise, linear: - if idat <= -1 or idat >= ncut -1 or iupp <= -1 or iupp >= ncut-1: # modfity for 0 index - cdfval = powint(zmin,cut[0],0.0,cdf[0],zval,1.) + if idat <= -1 or idat >= ncut - 1 or iupp <= -1 or iupp >= ncut-1: # modfity for 0 index + cdfval = powint(zmin, cut[0], 0.0, cdf[0], zval, 1.) else: - temp = powint(cut[idat],cut[idat+1],cdf[idat],cdf[idat+1],zval,1.) + temp = powint(cut[idat], cut[idat+1], + cdf[idat], cdf[idat+1], zval, 1.) cdfval = temp*ccdf[0]/cdf[iupp] else: -# Computing Z value: Are there any data out in the tail? + # Computing Z value: Are there any data out in the tail? - iupp = locate(cut,ncut,1,ncut,ccut[0]) + iupp = locate(cut, ncut, 1, ncut, ccut[0]) # Straight linear interpolation if no data; otherwise, local linear # interpolation: if iupp <= 0 or iupp >= ncut: - zval = powint(0.0,cdf[0],zmin,cut[0],cdfval,1.) + zval = powint(0.0, cdf[0], zmin, cut[0], cdfval, 1.) else: temp = cdfval*cdf[iupp]/ccdf[1] - idat = locate(cdf,ncut,1,ncut,temp) + idat = locate(cdf, ncut, 1, ncut, temp) if idat <= -1 or idat >= ncut-1: # adjusted for 0 origin - zval = powint(0.0,cdf[0],zmin,cut[0],cdfval,1.) + zval = powint(0.0, cdf[0], zmin, cut[0], cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[dat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[dat], cut[idat+1], temp, 1.) else: -# Error situation - unacceptable option: - ierr = 2 - return -1 + # Error situation - unacceptable option: + ierr = 2 + return -1 # FINISHED THE LOWER TAIL, ARE WE IN THE MIDDLE? if ipart == 1: -# Establish the lower and upper limits: + # Establish the lower and upper limits: if zval > UNEST: - cclow = locate(ccut,1,nccut,zval) + cclow = locate(ccut, 1, nccut, zval) else: - cclow = locate(ccdf,1,nccut,cdfval) + cclow = locate(ccdf, 1, nccut, cdfval) cchigh = cclow + 1 if middle == 1: -# Straight Linear Interpolation: + # Straight Linear Interpolation: powr = 1.0 if zval > UNEST: - cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,powr) + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, powr) else: - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, powr) # Power interpolation between class bounds? elif middle == 2: - if zval > UNEST: - cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,mpar) - else: - powr = 1.0 / mpar - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) + if zval > UNEST: + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, mpar) + else: + powr = 1.0 / mpar + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, powr) # Linear interpolation between the rescaled global cdf? elif middle == 3: - ilow = locate(cut,ncut,1,ncut,ccut[cclow]) - iupp = locate(cut,ncut,1,ncut,ccut[cchigh]) + ilow = locate(cut, ncut, 1, ncut, ccut[cclow]) + iupp = locate(cut, ncut, 1, ncut, ccut[cchigh]) if cut[ilow] < ccut[cclow]: ilow = ilow + 1 - if cut[iupp] > ccut[cchigh]: + if cut[iupp] > ccut[cchigh]: iupp = iupp - 1 if zval > UNEST: - idat = locate(cut,1,ncut,zval) + idat = locate(cut, 1, ncut, zval) # Straight linear interpolation if no data; otherwise, local linear # interpolation: if idat <= -1 or idat >= ncut-1 or ilow <= -1 or ilow >= ncut-1 or iupp <= -1 or iupp >= ncut-1 or iupp <= ilow: - cdfval=powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,1.) + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, 1.) else: - temp = powint(cut[idat],cut[idat+1],cdf[idat],cdf[idat+1],zval,1.) - cdfval=powint(cdf[ilow],cdf[iupp],ccdf[cclow],ccdf[cchigh],temp,1.) + temp = powint(cut[idat], cut[idat+1], + cdf[idat], cdf[idat+1], zval, 1.) + cdfval = powint(cdf[ilow], cdf[iupp], + ccdf[cclow], ccdf[cchigh], temp, 1.) else: -# Straight linear interpolation if no data; otherwise, local linear -# interpolation: + # Straight linear interpolation if no data; otherwise, local linear + # interpolation: if ilow <= -1 or ilow >= ncut-1 or iup <= -1 or iupp >= ncut-1 or iupp < ilow: - zval=powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, 1.) else: - temp=powint(ccdf[cclow],ccdf[cchigh],cdf[ilow],cdf[iupp],cdfval,1.) - idat = locate(cdf,1,ncut,temp) + temp = powint(ccdf[cclow], ccdf[cchigh], + cdf[ilow], cdf[iupp], cdfval, 1.) + idat = locate(cdf, 1, ncut, temp) if cut[idat] < ccut[cclow]: - idat=idat+1 + idat = idat+1 if idat <= -1 or idat >= ncut-1 or cut[idat+1] > ccut[cchigh]: - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) else: -# Error situation - unacceptable option: + # Error situation - unacceptable option: ierr = 2 return -1 @@ -844,25 +893,29 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar if utail == 1: powr = 1.0 if zval > UNEST: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,powr) + cdfval = powint(ccut(nccut), zmax, + ccdf(nccut), 1.0, zval, powr) else: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, powr) elif utail == 2: -# Power interpolation to upper limit "utpar"? + # Power interpolation to upper limit "utpar"? if zval > UNEST: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,utpar) + cdfval = powint(ccut(nccut), zmax, ccdf( + nccut), 1.0, zval, utpar) else: powr = 1.0 / utpar - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, powr) # Linear interpolation between the rescaled global cdf? elif utail == 3: if zval > UNEST: -# Approximately Locate the point and the class bound: - idat = locate(cut,1,ncut,zval,idat) - ilow = locate(cut,1,ncut,ccut(nccut),ilow) + # Approximately Locate the point and the class bound: + idat = locate(cut, 1, ncut, zval, idat) + ilow = locate(cut, 1, ncut, ccut(nccut), ilow) if cut[idat] < zval: idat = idat + 1 if cut[ilow] < ccut[nccut-1]: @@ -871,48 +924,52 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Straight linear interpolation if no data; otherwise, local linear # interpolation: if idat < -1 or idat >= ncut-1 or ilow <= -1 or ilow >= ncut-1: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,1.) + cdfval = powint(ccut(nccut), zmax, + ccdf(nccut), 1.0, zval, 1.) else: - temp = powint(cut(idat),cut(idat+1),cdf(idat),cdf(idat+1),zval,1.) - cdfval = powint(cdf(ilow),1.0,ccdf(nccut),1.0,temp,1.) + temp = powint(cut(idat), cut(idat+1), + cdf(idat), cdf(idat+1), zval, 1.) + cdfval = powint(cdf(ilow), 1.0, ccdf(nccut), 1.0, temp, 1.) else: -# Computing Z value: Are there any data out in the tail? - ilow = locate(cut,ncut,1,ncut,ccut(nccut),ilow) + # Computing Z value: Are there any data out in the tail? + ilow = locate(cut, ncut, 1, ncut, ccut(nccut), ilow) if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear # interpolation: if ilow <= -1 or ilow >= ncut-1: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,1.) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, 1.) else: - temp = powint(ccdf(nccut),1.0,cdf(ilow),1.0,cdfval,1.) - idat = locate(cdf,ncut,1,ncut,temp) + temp = powint(ccdf(nccut), 1.0, cdf(ilow), 1.0, cdfval, 1.) + idat = locate(cdf, ncut, 1, ncut, temp) if cut[idat] < ccut[nccut-1]: - idat=idat+1 + idat = idat+1 if idat >= ncut-1: - zval = powint(ccdf[nccut-1],1.0,ccut[nccut-1],zmax,cdfval,1.) + zval = powint(ccdf[nccut-1], 1.0, + ccut[nccut-1], zmax, cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) # Fit a Hyperbolic Distribution? elif utail == 4: -# Figure out "lambda" and required info: - lambd = math.pow(ccut[nccut],utpar)*(1.0-ccdf[nccut-1]) + # Figure out "lambda" and required info: + lambd = math.pow(ccut[nccut], utpar)*(1.0-ccdf[nccut-1]) if zval > UNEST: - cdfval = 1.0 - (lambd/(math.pow(zval,utpar))) + cdfval = 1.0 - (lambd/(math.pow(zval, utpar))) else: - zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) + zval = (lambd/math.pow((1.0-cdfval), (1.0/utpar))) else: -# Error situation - unacceptable option: + # Error situation - unacceptable option: ierr = 2 return -1 - if zval < zmin: zval = zmin if zval > zmax: @@ -922,14 +979,15 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar return zval -def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,ncty,icnode,ixnode,iynode,cnodev,cnodex,cnodey,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov,MAXCTX,MAXCTY,MAXKR1,MAXKR2): + +def krige(ix, iy, nx, ny, xx, yy, lktype, x, y, vr, sec, colocorr, lvm, close, covtab, nctx, ncty, icnode, ixnode, iynode, cnodev, cnodex, cnodey, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov, MAXCTX, MAXCTY, MAXKR1, MAXKR2): """GSLIB's KRIGE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ EPSLON = 1.0e-20 - cur_index = ix + (iy)*nx + cur_index = ix + (iy)*nx # print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) @@ -940,9 +998,14 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) - vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) - r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + vra = np.zeros(MAXKR1) + vrea = np.zeros(MAXKR1) + r = np.zeros(MAXKR1) + rr = np.zeros(MAXKR1) + s = np.zeros(MAXKR1) + a = np.zeros(MAXKR2) + cbb = cova2(0, 0, 0, 0, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print(r.shape) # Local mean if lktype == 2: @@ -952,84 +1015,91 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # Size of the kriging system: first = False - na = nclose + ncnode + na = nclose + ncnode # print('lktype' + str(lktype)) - if lktype == 0: neq = na + if lktype == 0: + neq = na if lktype == 1: -# print('ordinary kriging') + # print('ordinary kriging') + neq = na + 1 + if lktype == 2: + neq = na + if lktype == 3: + neq = na + 2 + if lktype == 4: neq = na + 1 - if lktype == 2: neq = na - if lktype == 3: neq = na + 2 - if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) # Set up kriging matrices: - iin=-1 # acocunting for 0 origin + iin = -1 # acocunting for 0 origin # print('krige na' + str(na)) - for j in range(0,na): + for j in range(0, na): -# Sort out the actual location of point "j" - if j < nclose: # adjusted for 0 index origin - index = int(close[j]) - x1 = x[index] - y1 = y[index] + # Sort out the actual location of point "j" + if j < nclose: # adjusted for 0 index origin + index = int(close[j]) + x1 = x[index] + y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) if sec.shape[0] > 1: - vrea[j]= sec[index]; + vrea[j] = sec[index] else: - vrea[j] = 0.0 # added this - no effect - if lktype == 2: vra[j] = vra[j] - vrea[j] + vrea[j] = 0.0 # added this - no effect + if lktype == 2: + vra[j] = vra[j] - vrea[j] else: -# It is a previously simulated node (keep index for table look-up): -# print(j) - index = j-(nclose) # adjust for 0 index - x1 = cnodex[index] - y1 = cnodey[index] + # It is a previously simulated node (keep index for table look-up): + # print(j) + index = j-(nclose) # adjust for 0 index + x1 = cnodex[index] + y1 = cnodey[index] vra[j] = cnodev[index] - ind = icnode[index] + ind = icnode[index] # print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - ix1 = ix + (int(ixnode[ind])-nctx-1) - iy1 = iy + (int(iynode[ind])-ncty-1) + ix1 = ix + (int(ixnode[ind])-nctx-1) + iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) - index = ix1 + (iy1-1)*nx + index = ix1 + (iy1-1)*nx if lktype == 2: - vrea[j]= lvm[index] + vrea[j] = lvm[index] vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix -# print('kriging indice populated' + str(j) + ',' + str(i)) -# Sort out the actual location of point "i" + for i in range(0, na): # we need the full matrix + # print('kriging indice populated' + str(j) + ',' + str(i)) + # Sort out the actual location of point "i" if i < nclose: - index = int(close[i]) # adjust for 0 index - x2 = x[index] - y2 = y[index] + index = int(close[i]) # adjust for 0 index + x2 = x[index] + y2 = y[index] else: -# It is a previously simulated node (keep index for table look-up): + # It is a previously simulated node (keep index for table look-up): #print('i = ' + str(i) + ',nclose = ' + str(nclose) + ', na = ' + str(na)) - index = i-(nclose) - x2 = cnodex[index] - y2 = cnodey[index] - ind = icnode[index] + index = i-(nclose) + x2 = cnodex[index] + y2 = cnodey[index] + ind = icnode[index] # print('previous node index' + str(ind)) - ix2 = ix + (int(ixnode[ind])-nctx-1) - iy2 = iy + (int(iynode[ind])-ncty-1) + ix2 = ix + (int(ixnode[ind])-nctx-1) + iy2 = iy + (int(iynode[ind])-ncty-1) # Now, get the covariance value: iin = iin + 1 # print('kriging data location = '); print(x2,y2) # Decide whether or not to use the covariance look-up table: if j <= nclose or i <= nclose: - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) a[iin] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix1 - ix2) -# jj = ncty + 1 + (iy1 - iy2) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix1 - ix2) + # jj = ncty + 1 + (iy1 - iy2) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: @@ -1039,8 +1109,9 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # Get the RHS value (possibly with covariance look-up table): if j <= nclose: -# print(cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print(cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) @@ -1048,19 +1119,20 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct r[j] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix - ix1) -# jj = ncty + 1 + (iy - iy1) - -# print('RHS ctable coord' + str(ii) + ',' + str(jj)) -# print('ix,iy ='); print(ix,iy) -# print('ix1,iy1'); print(ix1,iy1) -# if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 -# print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) -# else: -# cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix - ix1) + # jj = ncty + 1 + (iy - iy1) + + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) + # print('ix,iy ='); print(ix,iy) + # print('ix1,iy1'); print(ix1,iy1) + # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 + # print('Not using covariance table') + # cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # else: + # cov = covtab[ii,jj] + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) @@ -1069,54 +1141,59 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] - if lktype == 1: # we need the full array + if lktype == 1: # we need the full array iin = iin + 1 a[iin] = 1.0 - if lktype == 4: # we need the full array + if lktype == 4: # we need the full array iin = iin + 1 a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = 1.0 - iin = iin + 1 - a[iin] = 0.0 - r[na] = 1.0 + iin = iin + 1 + a[iin] = 0.0 + r[na] = 1.0 rr[na] = 1.0 # Addition of the External Drift Constraint: if lktype == 3: - edmin = 999999. + edmin = 999999. edmax = -999999. - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = vrea(i) - if a[iin] edmax: edmax = a[iin] - iin = iin + 1 - a[iin] = 0.0 - iin = iin + 1 - a[iin] = 0.0 - ind = ix + (iy-1)*nx - r[na+1] = lvm[ind] + if a[iin] < edmin: + edmin = a[iin] + if a[iin] > edmax: + edmax = a[iin] + iin = iin + 1 + a[iin] = 0.0 + iin = iin + 1 + a[iin] = 0.0 + ind = ix + (iy-1)*nx + r[na+1] = lvm[ind] rr[na+1] = r[na+1] - if (edmax-edmin) < EPSLON: neq = neq - 1 + if (edmax-edmin) < EPSLON: + neq = neq - 1 # Addition of Collocated Cosimulation Constraint: if lktype == 4: colc = True - sfmin = 1.0e21 + sfmin = 1.0e21 sfmax = -1.0e21 - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = colocorr*r[i] - if a[iin] < sfmin: sfmin = a[iin] - if a[iin] > sfmax: sfmax = a[iin] - iin = iin + 1 + if a[iin] < sfmin: + sfmin = a[iin] + if a[iin] > sfmax: + sfmax = a[iin] + iin = iin + 1 a[iin] = 1.0 - ii = na - r[ii] = colocorr + ii = na + r[ii] = colocorr rr[ii] = r[ii] # if (sfmax-sfmin) < EPSLON: # neq = neq - 1 @@ -1128,18 +1205,18 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('r'); print(r) # print('data'); print(vra) if neq == 1 and lktype != 3: -# print('neq = 1 '); print(a,r) - s[0] = r[0] / a[0] + # print('neq = 1 '); print(a,r) + s[0] = r[0] / a[0] else: -# print('neq prior ksol' + str(neq)) - s = ksol_numpy(neq,a,r) + # print('neq prior ksol' + str(neq)) + s = ksol_numpy(neq, a, r) # print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) # print('r'); print(r) - ising = 0 # need to figure this out + ising = 0 # need to figure this out # print('s'); print(s) # Compute the estimate and kriging variance. Recall that kriging type @@ -1150,44 +1227,46 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # 4 = Collocated Cosimulation: # print('kriging weights'); print(s) - cmean = 0.0 + cmean = 0.0 # print('cbb = ' + str(cbb)) cstdev = cbb sumwts = 0.0 - for i in range(0,na): - cmean = cmean + s[i]*vra[i] + for i in range(0, na): + cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) - if lktype == 2: cmean = cmean + gmean - if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion - ind = ix + (iy-1)*nx + if lktype == 2: + cmean = cmean + gmean + if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion + ind = ix + (iy-1)*nx # print(ind) # print('neq'); print(neq) # print('s'); print(s.shape) # print('lvm'); print(lvm.shape) # print('colc wt = ' + str(s[na]) + ' for ' + str(lvm[cur_index]) + ' at index ' + str(cur_index)) - cmean = cmean + s[na]*lvm[cur_index] - cstdev = cstdev - s[na] *rr[na] + cmean = cmean + s[na]*lvm[cur_index] + cstdev = cstdev - s[na] * rr[na] # Error message if negative variance: if cstdev < 0.0: -# print('ERROR: Negative Variance: ' + str(cstdev)) + # print('ERROR: Negative Variance: ' + str(cstdev)) cstdev = 0.0 - cstdev = math.sqrt(max(cstdev,0.0)) + cstdev = math.sqrt(max(cstdev, 0.0)) # print('kriging estimate and variance' + str(cmean) + ', ' + str(cstdev)) return cmean, cstdev -def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,nctx,ncty,icnode,ixnode,iynode,cnodev,cnodex,cnodey,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov,MAXCTX,MAXCTY,MAXKR1,MAXKR2): + +def ikrige(ix, iy, nx, ny, xx, yy, lktype, x, y, vr, sec, colocorr, gmean, lvm, close, covtab, nctx, ncty, icnode, ixnode, iynode, cnodev, cnodex, cnodey, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov, MAXCTX, MAXCTY, MAXKR1, MAXKR2): """GSLIB's KRIGE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python and modified for indicator kriging by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. WARNING: tested only for ktype 0,1,2 (2 is local proportion model / local mean provided, not residual approach) """ EPSLON = 1.0e-20 - cur_index = ix + (iy)*nx + cur_index = ix + (iy)*nx # print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) @@ -1198,9 +1277,14 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) - vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) - r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + vra = np.zeros(MAXKR1) + vrea = np.zeros(MAXKR1) + r = np.zeros(MAXKR1) + rr = np.zeros(MAXKR1) + s = np.zeros(MAXKR1) + a = np.zeros(MAXKR2) + cbb = cova2(0, 0, 0, 0, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print(r.shape) # Local mean # just pass the local probability as gmean # if lktype == 2: @@ -1210,89 +1294,95 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # Size of the kriging system: first = False - na = nclose + ncnode + na = nclose + ncnode # print('lktype' + str(lktype)) - if lktype == 0: neq = na + if lktype == 0: + neq = na if lktype == 1: -# print('ordinary kriging') + # print('ordinary kriging') + neq = na + 1 + if lktype == 2: + neq = na + if lktype == 3: + neq = na + 2 + if lktype == 4: neq = na + 1 - if lktype == 2: neq = na - if lktype == 3: neq = na + 2 - if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) # print('kriging data close'); print(close) # print('kriging node close'); print(icnode) # Set up kriging matrices: - iin=-1 # acocunting for 0 origin + iin = -1 # acocunting for 0 origin # print('krige na' + str(na)) - for j in range(0,na): + for j in range(0, na): -# Sort out the actual location of point "j" - if j < nclose: # adjusted for 0 index origin - index = int(close[j]) - x1 = x[index] - y1 = y[index] + # Sort out the actual location of point "j" + if j < nclose: # adjusted for 0 index origin + index = int(close[j]) + x1 = x[index] + y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) # if lvm.shape[0] > 1: # vrea[j]= sec[index]; # else: - vrea[j] = 0.0 # added this - no effect + vrea[j] = 0.0 # added this - no effect # if lktype == 2: vra[j] = vra[j] - vrea[j] # just using local variable mean not full residual approach else: -# It is a previously simulated node (keep index for table look-up): -# print(j) - index = j-(nclose) # adjust for 0 index - x1 = cnodex[index] - y1 = cnodey[index] + # It is a previously simulated node (keep index for table look-up): + # print(j) + index = j-(nclose) # adjust for 0 index + x1 = cnodex[index] + y1 = cnodey[index] vra[j] = cnodev[index] - ind = icnode[index] + ind = icnode[index] # print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - ix1 = ix + (int(ixnode[ind])-nctx-1) - iy1 = iy + (int(iynode[ind])-ncty-1) + ix1 = ix + (int(ixnode[ind])-nctx-1) + iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) - index = ix1 + (iy1-1)*nx + index = ix1 + (iy1-1)*nx # if lktype == 2: # vrea[j]= lvm[index] # vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix -# print('kriging indice populated' + str(j) + ',' + str(i)) -# Sort out the actual location of point "i" + for i in range(0, na): # we need the full matrix + # print('kriging indice populated' + str(j) + ',' + str(i)) + # Sort out the actual location of point "i" if i < nclose: - index = int(close[i]) # adjust for 0 index - x2 = x[index] - y2 = y[index] + index = int(close[i]) # adjust for 0 index + x2 = x[index] + y2 = y[index] else: -# It is a previously simulated node (keep index for table look-up): + # It is a previously simulated node (keep index for table look-up): #print('i = ' + str(i) + ',nclose = ' + str(nclose) + ', na = ' + str(na)) - index = i-(nclose) - x2 = cnodex[index] - y2 = cnodey[index] - ind = icnode[index] + index = i-(nclose) + x2 = cnodex[index] + y2 = cnodey[index] + ind = icnode[index] # print('previous node index' + str(ind)) - ix2 = ix + (int(ixnode[ind])-nctx-1) - iy2 = iy + (int(iynode[ind])-ncty-1) + ix2 = ix + (int(ixnode[ind])-nctx-1) + iy2 = iy + (int(iynode[ind])-ncty-1) # Now, get the covariance value: iin = iin + 1 # print('kriging data location = '); print(x2,y2) # Decide whether or not to use the covariance look-up table: if j <= nclose or i <= nclose: -# print('x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov') -# print(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print('x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov') + # print(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print('cov'); print(cov) a[iin] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix1 - ix2) -# jj = ncty + 1 + (iy1 - iy2) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix1 - ix2) + # jj = ncty + 1 + (iy1 - iy2) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: @@ -1302,8 +1392,9 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # Get the RHS value (possibly with covariance look-up table): if j <= nclose: -# print(cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print(cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) @@ -1311,19 +1402,20 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n r[j] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix - ix1) -# jj = ncty + 1 + (iy - iy1) - -# print('RHS ctable coord' + str(ii) + ',' + str(jj)) -# print('ix,iy ='); print(ix,iy) -# print('ix1,iy1'); print(ix1,iy1) -# if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 -# print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) -# else: -# cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix - ix1) + # jj = ncty + 1 + (iy - iy1) + + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) + # print('ix,iy ='); print(ix,iy) + # print('ix1,iy1'); print(ix1,iy1) + # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 + # print('Not using covariance table') + # cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # else: + # cov = covtab[ii,jj] + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) @@ -1332,54 +1424,59 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] - if lktype == 1: # we need the full array + if lktype == 1: # we need the full array iin = iin + 1 a[iin] = 1.0 - if lktype == 4: # we need the full array + if lktype == 4: # we need the full array iin = iin + 1 a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = 1.0 - iin = iin + 1 - a[iin] = 0.0 - r[na] = 1.0 + iin = iin + 1 + a[iin] = 0.0 + r[na] = 1.0 rr[na] = 1.0 # Addition of the External Drift Constraint: if lktype == 3: - edmin = 999999. + edmin = 999999. edmax = -999999. - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = vrea(i) - if a[iin] edmax: edmax = a[iin] - iin = iin + 1 - a[iin] = 0.0 - iin = iin + 1 - a[iin] = 0.0 - ind = ix + (iy-1)*nx - r[na+1] = lvm[ind] + if a[iin] < edmin: + edmin = a[iin] + if a[iin] > edmax: + edmax = a[iin] + iin = iin + 1 + a[iin] = 0.0 + iin = iin + 1 + a[iin] = 0.0 + ind = ix + (iy-1)*nx + r[na+1] = lvm[ind] rr[na+1] = r[na+1] - if (edmax-edmin) < EPSLON: neq = neq - 1 + if (edmax-edmin) < EPSLON: + neq = neq - 1 # Addition of Collocated Cosimulation Constraint: if lktype == 4: colc = True - sfmin = 1.0e21 + sfmin = 1.0e21 sfmax = -1.0e21 - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = colocorr*r[i] - if a[iin] < sfmin: sfmin = a[iin] - if a[iin] > sfmax: sfmax = a[iin] - iin = iin + 1 + if a[iin] < sfmin: + sfmin = a[iin] + if a[iin] > sfmax: + sfmax = a[iin] + iin = iin + 1 a[iin] = 1.0 - ii = na - r[ii] = colocorr + ii = na + r[ii] = colocorr rr[ii] = r[ii] # if (sfmax-sfmin) < EPSLON: # neq = neq - 1 @@ -1391,18 +1488,18 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('r'); print(r) # print('data'); print(vra) if neq == 1 and lktype != 3: -# print('neq = 1 '); print(a,r) - s[0] = r[0] / a[0] + # print('neq = 1 '); print(a,r) + s[0] = r[0] / a[0] else: -# print('neq prior ksol' + str(neq)) - s = ksol_numpy(neq,a,r) + # print('neq prior ksol' + str(neq)) + s = ksol_numpy(neq, a, r) # print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) # print('r'); print(r) - ising = 0 # need to figure this out + ising = 0 # need to figure this out # print('s'); print(s) # Compute the estimate and kriging variance. Recall that kriging type @@ -1413,43 +1510,45 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # 4 = Collocated Cosimulation: # print('kriging weights'); print(s) - cmean = 0.0 + cmean = 0.0 # print('cbb = ' + str(cbb)) cstdev = cbb sumwts = 0.0 - for i in range(0,na): - cmean = cmean + s[i]*vra[i] + for i in range(0, na): + cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) # if lktype == 2: cmean = cmean + gmean - if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion - ind = ix + (iy-1)*nx + if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion + ind = ix + (iy-1)*nx # print(ind) # print('neq'); print(neq) # print('s'); print(s.shape) # print('lvm'); print(lvm.shape) # print('colc wt = ' + str(s[na]) + ' for ' + str(lvm[cur_index]) + ' at index ' + str(cur_index)) - cmean = cmean + s[na]*lvm[cur_index] - cstdev = cstdev - s[na] *rr[na] + cmean = cmean + s[na]*lvm[cur_index] + cstdev = cstdev - s[na] * rr[na] if lktype == 0 or lktype == 2: cmean = cmean + (1.0-sumwts)*gmean # print('cmean'); print(cmean) # Error message if negative variance: if cstdev < 0.0: -# print('ERROR: Negative Variance: ' + str(cstdev)) + # print('ERROR: Negative Variance: ' + str(cstdev)) cstdev = 0.0 - cstdev = math.sqrt(max(cstdev,0.0)) + cstdev = math.sqrt(max(cstdev, 0.0)) # print('kriging estimate and variance' + str(cmean) + ', ' + str(cstdev)) return cmean, cstdev -def getindex(nc,cmn,csiz,loc): + +def getindex(nc, cmn, csiz, loc): ic = min(int((loc - cmn) / csiz), nc - 1) return ic + def correct_trend(trend): """Correct a indicator based trend model for closure (probabilities sum to 1.0). :param trend: ndarray [ny,nx,ncut] @@ -1458,17 +1557,18 @@ def correct_trend(trend): ny = trend.shape[0] nx = trend.shape[1] ncut = trend.shape[2] - for iy in range(0,ny): - for ix in range(0,nx): + for iy in range(0, ny): + for ix in range(0, nx): sum = 0.0 - for ic in range(0,ncut): - sum = sum + trend[iy,ix,ic] + for ic in range(0, ncut): + sum = sum + trend[iy, ix, ic] if sum > 0.0: - for icut in range(0,ncut): - trend[iy,ix,ic] = trend[iy,ix,ic] / sum + for icut in range(0, ncut): + trend[iy, ix, ic] = trend[iy, ix, ic] / sum return trend -def ordrel(ivtype,ncut,ccdf): + +def ordrel(ivtype, ncut, ccdf): """Correct a indicator based CDF for order relations. :param ivtype: variable type, 0 - categorical and 1 - continuous :param ncut: number of categories or thresholds @@ -1478,10 +1578,10 @@ def ordrel(ivtype,ncut,ccdf): # print('input ordering relations'); print(ccdf) ccdfo = np.zeros(ncut) ccdf1 = np.zeros(ncut) - ccdf2 = np.zeros(ncut) # do we need MAXCUT = 100 for these 2? + ccdf2 = np.zeros(ncut) # do we need MAXCUT = 100 for these 2? # Make sure conditional cdf is within [0,1]: - for i in range(0,ncut): + for i in range(0, ncut): if ccdf[i] < 0.0: ccdf1[i] = 0.0 ccdf2[i] = 0.0 @@ -1496,22 +1596,26 @@ def ordrel(ivtype,ncut,ccdf): # Correct sequentially up, then down, and then average: if ivtype == 0: sumcdf = 0.0 - for i in range(0,ncut): + for i in range(0, ncut): sumcdf = sumcdf + ccdf1[i] - if sumcdf <= 0.0: sumcdf = 1.0 - for i in range(0,ncut): + if sumcdf <= 0.0: + sumcdf = 1.0 + for i in range(0, ncut): ccdfo[i] = ccdf1[i] / sumcdf else: - for i in range(1,ncut): - if ccdf1[i] < ccdf1[i-1]: ccdf1[i] = ccdf1[i-1] - for i in range(ncut-2,0,-1): - if ccdf2[i] > ccdf2[i+1]: ccdf2[i] = ccdf2[i+1] - for i in range(0,ncut): + for i in range(1, ncut): + if ccdf1[i] < ccdf1[i-1]: + ccdf1[i] = ccdf1[i-1] + for i in range(ncut-2, 0, -1): + if ccdf2[i] > ccdf2[i+1]: + ccdf2[i] = ccdf2[i+1] + for i in range(0, ncut): ccdfo[i] = 0.5*(ccdf1[i]+ccdf2[i]) # Return with corrected CDF: return ccdfo + def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): """GSLIB's DECLUS program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at @@ -1742,21 +1846,7 @@ def gam(array, tmin, tmax, xsiz, ysiz, ixd, iyd, nlag, isill): return lag, vario, npp -def gamv( - df, - xcol, - ycol, - vcol, - tmin, - tmax, - xlag, - xltol, - nlag, - azm, - atol, - bandwh, - isill, -): +def gamv(df, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): """GSLIB's GAMV program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (Jan, 2019). @@ -1785,6 +1875,13 @@ def gamv( y = df_extract[ycol].values vr = df_extract[vcol].values + return _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, + isill) + + +@jit(**JITKW, parallel=True) +def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): + # Summary statistics for the data after trimming avg = vr.mean() # TODO: not used stdev = vr.std() @@ -1803,7 +1900,7 @@ def gamv( ) # Standardize sill to one by dividing all variogram values by the variance - for il in range(0, nlag + 2): + for il in nb.prange(0, nlag + 2): if isill == 1: vario[il] = vario[il] / sills @@ -1813,7 +1910,7 @@ def gamv( return dis, vario, npp -@jit(nopython=True) +@jit(**JITKW, parallel=True) def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): """Calculate the variogram by looping over combinatorial of data pairs. :param x: x values @@ -1862,8 +1959,8 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): dismxs = ((float(nlag) + 0.5 - EPSLON) * xlag) ** 2 # Main loop over all pairs - for i in range(0, nd): - for j in range(0, nd): + for i in nb.prange(0, nd): + for j in nb.prange(0, nd): # Definition of the lag corresponding to the current pair dx = x[j] - x[i] @@ -1970,7 +2067,8 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): return dis, vario, npp -def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): + +def varmapv(df, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill): """Calculate the variogram map from irregularly spaced data. :param df: DataFrame with the spatial data, xcol, ycol, vcol coordinates and property columns :param xcol: DataFrame column with x coordinate @@ -1987,12 +2085,35 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): :return: TODO """ # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values + return _varmapv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, + isill) + + +@jit(**JITKW, parallel=False) # runs faster not in parallel +def _varmapv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill): + """Calculate the variogram map from irregularly spaced data. + :param df: DataFrame with the spatial data, xcol, ycol, vcol coordinates and property columns + :param xcol: DataFrame column with x coordinate + :param ycol: DataFrame column with y coordinate + :param vcol: DataFrame column with value of interest + :param tmin: lower trimming limit + :param tmax: upper trimming limit + :param nxlag: number of lags in the x direction + :param nxlag: number of lags in the y direction + :param dxlag: size of the lags in the x direction + :param dylag: size of the lags in the y direction + :param minnp: minimum number of pairs to calculate a variogram value + :param isill: standardize sill to be 1.0 + :return: TODO + """ + # Summary statistics for the data after trimming avg = vr.mean() stdev = vr.std() @@ -2002,63 +2123,65 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): vrmax = vr.max() # Initialize the summation arrays - npp = np.zeros((nylag*2+1,nxlag*2+1)) - gam = np.zeros((nylag*2+1,nxlag*2+1)) - nppf = np.zeros((nylag*2+1,nxlag*2+1)) - gamf = np.zeros((nylag*2+1,nxlag*2+1)) - hm = np.zeros((nylag*2+1,nxlag*2+1)) - tm = np.zeros((nylag*2+1,nxlag*2+1)) - hv = np.zeros((nylag*2+1,nxlag*2+1)) - tv = np.zeros((nylag*2+1,nxlag*2+1)) + npp = np.zeros((nylag*2+1, nxlag*2+1)) + gam = np.zeros((nylag*2+1, nxlag*2+1)) + nppf = np.zeros((nylag*2+1, nxlag*2+1)) + gamf = np.zeros((nylag*2+1, nxlag*2+1)) + hm = np.zeros((nylag*2+1, nxlag*2+1)) + tm = np.zeros((nylag*2+1, nxlag*2+1)) + hv = np.zeros((nylag*2+1, nxlag*2+1)) + tv = np.zeros((nylag*2+1, nxlag*2+1)) # First fix the location of a seed point: - for i in range(0,nd): + for i in nb.prange(0, nd): # Second loop over the data: - for j in range(0,nd): + for j in nb.prange(0, nd): # The lag: ydis = y[j] - y[i] iyl = nylag + int(ydis/dylag) - if iyl < 0 or iyl > nylag*2: # acocunting for 0,...,n-1 array indexing + if iyl < 0 or iyl > nylag*2: # acocunting for 0,...,n-1 array indexing continue xdis = x[j] - x[i] ixl = nxlag + int(xdis/dxlag) - if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing + if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing continue # We have an acceptable pair, therefore accumulate all the statistics # that are required for the variogram: - npp[iyl,ixl] = npp[iyl,ixl] + 1 # our ndarrays read from the base to top, so we flip - tm[iyl,ixl] = tm[iyl,ixl] + vr[i] - hm[iyl,ixl] = hm[iyl,ixl] + vr[j] - tv[iyl,ixl] = tm[iyl,ixl] + vr[i]*vr[i] - hv[iyl,ixl] = hm[iyl,ixl] + vr[j]*vr[j] - gam[iyl,ixl] = gam[iyl,ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) + # our ndarrays read from the base to top, so we flip + npp[iyl, ixl] = npp[iyl, ixl] + 1 + tm[iyl, ixl] = tm[iyl, ixl] + vr[i] + hm[iyl, ixl] = hm[iyl, ixl] + vr[j] + tv[iyl, ixl] = tm[iyl, ixl] + vr[i]*vr[i] + hv[iyl, ixl] = hm[iyl, ixl] + vr[j]*vr[j] + gam[iyl, ixl] = gam[iyl, ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) # Get average values for gam, hm, tm, hv, and tv, then compute # the correct "variogram" measure: - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): - if npp[iy,ix] <= minnp: - gam[iy,ix] = -999. - hm[iy,ix] = -999. - tm[iy,ix] = -999. - hv[iy,ix] = -999. - tv[iy,ix] = -999. + for iy in nb.prange(0, nylag*2+1): + for ix in nb.prange(0, nxlag*2+1): + if npp[iy, ix] <= minnp: + gam[iy, ix] = -999. + hm[iy, ix] = -999. + tm[iy, ix] = -999. + hv[iy, ix] = -999. + tv[iy, ix] = -999. else: - rnum = npp[iy,ix] - gam[iy,ix] = gam[iy,ix] / (2*rnum) # semivariogram - hm[iy,ix] = hm[iy,ix] / rnum - tm[iy,ix] = tm[iy,ix] / rnum - hv[iy,ix] = hv[iy,ix] / rnum - hm[iy,ix]*hm[iy,ix] - tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] + rnum = npp[iy, ix] + gam[iy, ix] = gam[iy, ix] / (2*rnum) # semivariogram + hm[iy, ix] = hm[iy, ix] / rnum + tm[iy, ix] = tm[iy, ix] / rnum + hv[iy, ix] = hv[iy, ix] / rnum - hm[iy, ix]*hm[iy, ix] + tv[iy, ix] = tv[iy, ix] / rnum - tm[iy, ix]*tm[iy, ix] # Attempt to standardize: if isill > 0: - gamf[iy,ix] = gamf[iy,ix]/sills - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): - gamf[iy,ix] = gam[nylag*2-iy,ix] - nppf[iy,ix] = npp[nylag*2-iy,ix] + gamf[iy, ix] = gamf[iy, ix]/sills + for iy in nb.prange(0, nylag*2+1): + for ix in nb.prange(0, nxlag*2+1): + gamf[iy, ix] = gam[nylag*2-iy, ix] + nppf[iy, ix] = npp[nylag*2-iy, ix] return gamf, nppf + def vmodel( nlag, xlag, @@ -2076,11 +2199,11 @@ def vmodel( """ # Parameters - MAXNST=4 - DEG2RAD=3.14159265/180.0 - MAXROT=MAXNST+1 + MAXNST = 4 + DEG2RAD = 3.14159265/180.0 + MAXROT = MAXNST+1 EPSLON = 1.0e-20 - VERSION= 1.01 + VERSION = 1.01 # Declare arrays index = np.zeros(nlag+1) @@ -2115,19 +2238,21 @@ def vmodel( print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff)) rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) - - xx = 0.0; yy = 0.0 - for il in range(0,nlag+1): + xx = 0.0 + yy = 0.0 + for il in range(0, nlag+1): index[il] = il - cov[il] = cova2(0.0,0.0,xx,yy,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov[il] = cova2(0.0, 0.0, xx, yy, nst, c0, 9999.9, + cc, aa, it, ang, anis, rotmat, maxcov) gam[il] = maxcov - cov[il] - ro[il] = cov[il]/maxcov - h[il] = math.sqrt(max((xx*xx+yy*yy),0.0)) + ro[il] = cov[il]/maxcov + h[il] = math.sqrt(max((xx*xx+yy*yy), 0.0)) xx = xx + xoff yy = yy + yoff # finished - return index,h,gam,cov,ro + return index, h, gam, cov, ro + def nscore( df, vcol, wcol=None, ismooth=False, dfsmooth=None, smcol=0, smwcol=0 @@ -2205,6 +2330,7 @@ def nscore( _dists = [] _weights = [] + def kb2d( df, xcol, @@ -2266,15 +2392,24 @@ def kb2d( # load the variogram nst = vario['nst'] - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Allocate the needed memory: xdb = np.zeros(MAXDIS) @@ -2288,21 +2423,23 @@ def kb2d( rr = np.zeros(MAXKD) s = np.zeros(MAXKD) a = np.zeros(MAXKRG) - kmap = np.zeros((nx,ny)) - vmap = np.zeros((nx,ny)) + kmap = np.zeros((nx, ny)) + vmap = np.zeros((nx, ny)) # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics for the data after trimming avg = vr.mean() @@ -2321,40 +2458,42 @@ def kb2d( # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): - ndb = nxdis * nydis + ndb = nxdis * nydis if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') return kmap - xdis = xsiz / max(float(nxdis),1.0) - ydis = ysiz / max(float(nydis),1.0) + xdis = xsiz / max(float(nxdis), 1.0) + ydis = ysiz / max(float(nydis), 1.0) xloc = -0.5*(xsiz+xdis) - i = -1 # accounting for 0 as lowest index - for ix in range(0,nxdis): + i = -1 # accounting for 0 as lowest index + for ix in range(0, nxdis): xloc = xloc + xdis yloc = -0.5*(ysiz+ydis) - for iy in range(0,nydis): + for iy in range(0, nydis): yloc = yloc + ydis i = i+1 xdb[i] = xloc ydb[i] = yloc # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius*radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(xdb[0], ydb[0], xdb[0], ydb[0], nst, c0, + PMX, cc, aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov - first = False + first = False if ndb <= 1: cbb = cov else: - for i in range(0,ndb): - for j in range(0,ndb): - cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + for i in range(0, ndb): + for j in range(0, ndb): + cov = cova2(xdb[i], ydb[i], xdb[j], ydb[j], nst, + c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) if i == j: cov = cov - c0 cbb = cbb + cov @@ -2364,51 +2503,57 @@ def kb2d( nk = 0 ak = 0.0 vk = 0.0 - for iy in range(0,ny): + for iy in range(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in range(0, nx): xloc = xmn + (ix-0)*xsiz - current_node = (yloc,xloc) + current_node = (yloc, xloc) # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search + # use kd tree for fast nearest data search + dist, nums = tree.query( + current_node, ndmax, distance_upper_bound=radius) # remove any data outside search radius na = len(dist) - nums = nums[dist UNEST: nk = nk + 1 ak = ak + est @@ -2520,18 +2670,19 @@ def kb2d( return kmap, vmap + _nums = None _dists = None _weights = None -@jit(nopython=True) -def kb2d_jit_main( - tree, nd, x, y, vr, - xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, - ndmax, radius, ktype, skmean, vario_int, vario_float): +@jit(**JITKW, parallel=False) # numba crashed on parallel=True +def _kb2d_jit( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float): -# Constants + # Constants UNEST = -999. EPSLON = 1.0e-10 VERSION = 2.907 @@ -2542,7 +2693,7 @@ def kb2d_jit_main( MAXKD = MAXSAM + 1 MAXKRG = MAXKD * MAXKD - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) # load the variogram nst = vario_int['nst'] @@ -2564,9 +2715,8 @@ def kb2d_jit_main( rr = np.zeros(MAXKD) s = np.zeros(MAXKD) a = np.zeros(MAXKRG) - kmap = np.zeros((nx,ny)) - vmap = np.zeros((nx,ny)) - + kmap = np.zeros((nx, ny)) + vmap = np.zeros((nx, ny)) c0 = vario_float['nug'] cc[0] = vario_float['cc1'] @@ -2593,7 +2743,7 @@ def kb2d_jit_main( # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): - ndb = nxdis * nydis + ndb = nxdis * nydis if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') @@ -2601,7 +2751,7 @@ def kb2d_jit_main( xdis = xsiz / max(float(nxdis), 1.0) ydis = ysiz / max(float(nydis), 1.0) xloc = -0.5 * (xsiz + xdis) - i = -1 # accounting for 0 as lowest index + i = -1 # accounting for 0 as lowest index for ix in range(0, nxdis): xloc = xloc + xdis yloc = -0.5 * (ysiz+ydis) @@ -2612,24 +2762,26 @@ def kb2d_jit_main( ydb[i] = yloc # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius * radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(xdb[0], ydb[0], xdb[0], ydb[0], nst, c0, + PMX, cc, aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov - first = False + first = False if ndb <= 1: cbb = cov else: - for i in range(0, ndb): - for j in range(0, ndb): - cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + for i in nb.prange(0, ndb): + for j in nb.prange(0, ndb): + cov = cova2(xdb[i], ydb[i], xdb[j], ydb[j], nst, + c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) if i == j: cov = cov - c0 - cbb = cbb + cov + cbb += cov # print(real) # cbb = cbb/real(ndb*ndb) # TODO: undefined? @@ -2638,9 +2790,9 @@ def kb2d_jit_main( ak = 0.0 vk = 0.0 - for iy in range(0,ny): + for iy in nb.prange(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in nb.prange(0, nx): xloc = xmn + (ix-0)*xsiz current_node = (yloc, xloc) @@ -2653,38 +2805,42 @@ def kb2d_jit_main( dist, nums = tree[current_node] # remove any data outside search radius na = len(dist) - nums = nums[dist UNEST: nk = nk + 1 ak = ak + est @@ -2837,7 +2997,8 @@ def kb2d_jit( :return: """ # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values @@ -2845,20 +3006,23 @@ def kb2d_jit( # Make a KDTree for fast search of nearest neighbours # dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) + data_locs = np.column_stack((y, x)) import scipy.spatial as sp - _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) vario_int = Dict.empty( - key_type = nb.types.unicode_type, - value_type = nb.types.i8 + key_type=nb.types.unicode_type, + value_type=nb.types.i8 ) vario_float = Dict.empty( - key_type = nb.types.unicode_type, - value_type = nb.f8 + key_type=nb.types.unicode_type, + value_type=nb.f8 ) + vario = copy(vario) + vario_float['nug'] = vario.pop('nug') vario_float['cc1'] = vario.pop('cc1') vario_float['cc2'] = vario.pop('cc2') @@ -2867,18 +3031,19 @@ def kb2d_jit( vario_int[key] = vario[key] tree = Dict.empty( - key_type = nb.types.Tuple((nb.f8, nb.f8)), - value_type = nb.types.Tuple((nb.f8[:], nb.i4[:])) + key_type=nb.types.Tuple((nb.f8, nb.f8)), + value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) ) - for iy in range(0,ny): + for iy in range(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in range(0, nx): xloc = xmn + (ix-0)*xsiz - current_node = (yloc,xloc) - tree[current_node] = _tree.query(current_node, ndmax, distance_upper_bound=radius) + current_node = (yloc, xloc) + tree[current_node] = _tree.query( + current_node, ndmax, distance_upper_bound=radius) - kmap, vmap = kb2d_jit_main( + kmap, vmap = _kb2d_jit( tree, nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, ndmax, radius, ktype, skmean, vario_int, vario_float) @@ -2886,8 +3051,7 @@ def kb2d_jit( return kmap, vmap -def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xmn,xsiz,ny,ymn,ysiz,ndmin,ndmax,radius,ktype,vario): - +def ik2d(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, ndmin, ndmax, radius, ktype, vario): """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -2925,30 +3089,42 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm MAXEQ = MAXSAM + 1 mik = 0 # full indicator kriging use_trend = False - if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: use_trend = True + if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: + use_trend = True # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) - aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) - ang = np.zeros((MAXNST,ncut)); anis = np.zeros((MAXNST,ncut)) - - for icut in range(0,ncut): + nst = np.zeros(ncut, dtype=int) + c0 = np.zeros(ncut) + cc = np.zeros((MAXNST, ncut)) + aa = np.zeros((MAXNST, ncut), dtype=int) + it = np.zeros((MAXNST, ncut), dtype=int) + ang = np.zeros((MAXNST, ncut)) + anis = np.zeros((MAXNST, ncut)) + + for icut in range(0, ncut): nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; - ang[0,icut] = vario[icut]['azi1']; - aa[0,icut] = vario[icut]['hmaj1']; anis[0,icut] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + c0[icut] = vario[icut]['nug'] + cc[0, icut] = vario[icut]['cc1'] + it[0, icut] = vario[icut]['it1'] + ang[0, icut] = vario[icut]['azi1'] + aa[0, icut] = vario[icut]['hmaj1'] + anis[0, icut] = vario[icut]['hmin1']/vario[icut]['hmaj1'] if nst[icut] == 2: - cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; - aa[1,icut] = vario[icut]['hmaj2']; anis[1,icut] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + cc[1, icut] = vario[icut]['cc2'] + it[1, icut] = vario[icut]['it2'] + ang[1, icut] = vario[icut]['azi2'] + aa[1, icut] = vario[icut]['hmaj2'] + anis[1, icut] = vario[icut]['hmin2']/vario[icut]['hmaj2'] # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] MAXDAT = len(df_extract) MAXCUT = ncut MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) + MAXROT = MAXNST*MAXCUT + 1 + ikout = np.zeros((nx, ny, ncut)) maxcov = np.zeros(ncut) # Allocate the needed memory: @@ -2961,8 +3137,8 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm rr = np.zeros(MAXEQ) s = np.zeros(MAXEQ) a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) + ikmap = np.zeros((nx, ny, ncut)) + vr = np.zeros((MAXDAT, MAXCUT+1)) nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) @@ -2970,7 +3146,7 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) + ikout = np.zeros((nx, ny, ncut)) x = df_extract[xcol].values y = df_extract[ycol].values @@ -2980,31 +3156,33 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm # data value. if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + vr[:, icut] = np.where( + (v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v + for icut in range(0, ncut): + vr[:, icut] = np.where(v <= thresh[icut], '1', '0') + vr[:, ncut] = v # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, MAXDAT)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics of the input data - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() + avg = vr[:, ncut].mean() + stdev = vr[:, ncut].std() ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() + vrmin = vr[:, ncut].min() + vrmax = vr[:, ncut].max() print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - actloc = np.zeros(MAXDAT, dtype = int) - for i in range(1,MAXDAT): + actloc = np.zeros(MAXDAT, dtype=int) + for i in range(1, MAXDAT): actloc[i] = i # Set up the rotation/anisotropy matrices that are needed for the @@ -3013,38 +3191,40 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[:,ic],cc[:,ic],ang[:,ic],9999.9) + for ic in range(0, ncut): + rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic], int( + nst[ic]), it[:, ic], cc[:, ic], ang[:, ic], 9999.9) rotmat.append(rotmat_temp) # Initialize accumulators: # not setup yet nk = 0 xk = 0.0 vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 + for icut in range(0, ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 xviol[icut] = -1.0 nxy = nx*ny print('Working on the kriging') # Report on progress from time to time: if koption == 0: - nxy = nx*ny + nxy = nx*ny nloop = nxy - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) else: nloop = 10000000 - irepo = max(1,min((nd/10),10000)) + irepo = max(1, min((nd/10), 10000)) ddh = 0.0 # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: - for index in range(0,nloop): + for index in range(0, nloop): - if (int(index/irepo)*irepo) == index: print(' currently on estimate ' + str(index)) + if (int(index/irepo)*irepo) == index: + print(' currently on estimate ' + str(index)) if koption == 0: - iy = int((index)/nx) - ix = index - (iy)*nx + iy = int((index)/nx) + ix = index - (iy)*nx xloc = xmn + (ix)*xsiz yloc = ymn + (iy)*ysiz else: @@ -3055,92 +3235,98 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - current_node = (yloc,xloc) - dist, close = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + current_node = (yloc, xloc) + # use kd tree for fast nearest data search + dist, close = tree.query(current_node, ndmax) # remove any data outside search radius - close = close[dist= 1: krig = False + if mik == 1 and ic >= 1: + krig = False # Identify the close data (there may be a different number of data at # each threshold because of constraint intervals); however, if # there are no constraint intervals then this step can be avoided. nca = -1 - for ia in range(0,nclose): - j = int(close[ia]+0.5) + for ia in range(0, nclose): + j = int(close[ia]+0.5) ii = actloc[j] accept = True - if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: accept = False + if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: + accept = False if accept: nca = nca + 1 - vra[nca] = vr[ii,ic] - xa[nca] = x[j] - ya[nca] = y[j] + vra[nca] = vr[ii, ic] + xa[nca] = x[j] + ya[nca] = y[j] # If there are no samples at this threshold then use the global cdf: if nca == -1: if use_trend: - ccdf[ic] = trend[ny-iy-1,ix,ic] + ccdf[ic] = trend[ny-iy-1, ix, ic] else: ccdf[ic] = gcdf[ic] else: -# Now, only load the variogram, build the matrix,... if kriging: + # Now, only load the variogram, build the matrix,... if kriging: neq = nclose + ktype na = nclose # Set up kriging matrices: - iin=-1 # accounting for first index of 0 - for j in range(0,na): -# Establish Left Hand Side Covariance Matrix: - for i in range(0,na): # was j - want full matrix + iin = -1 # accounting for first index of 0 + for j in range(0, na): + # Establish Left Hand Side Covariance Matrix: + for i in range(0, na): # was j - want full matrix iin = iin + 1 - a[iin] = cova2(xa[i],ya[i],xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + a[iin] = cova2(xa[i], ya[i], xa[j], ya[j], nst[ic], c0[ic], PMX, cc[:, ic], + aa[:, ic], it[:, ic], ang[:, ic], anis[:, ic], rotmat[ic], maxcov[ic]) if ktype == 1: iin = iin + 1 a[iin] = maxcov[ic] - r[j] = cova2(xloc,yloc,xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + r[j] = cova2(xloc, yloc, xa[j], ya[j], nst[ic], c0[ic], PMX, cc[:, ic], + aa[:, ic], it[:, ic], ang[:, ic], anis[:, ic], rotmat[ic], maxcov[ic]) # Set the unbiasedness constraint: if ktype == 1: - for i in range(0,na): + for i in range(0, na): iin = iin + 1 a[iin] = maxcov[ic] - iin = iin + 1 - a[iin] = 0.0 - r[neq-1] = maxcov[ic] + iin = iin + 1 + a[iin] = 0.0 + r[neq-1] = maxcov[ic] rr[neq-1] = r[neq] # Solve the system: if neq == 1: ising = 0.0 - s[0] = r[0] / a[0] + s[0] = r[0] / a[0] else: - s = ksol_numpy(neq,a,r) + s = ksol_numpy(neq, a, r) # Finished kriging (if it was necessary): # Compute Kriged estimate of cumulative probability: - sumwts = 0.0 + sumwts = 0.0 ccdf[ic] = 0.0 - for i in range(0,nclose): + for i in range(0, nclose): ccdf[ic] = ccdf[ic] + vra[i]*s[i] - sumwts = sumwts + s[i] + sumwts = sumwts + s[i] if ktype == 0: if use_trend == True: - ccdf[ic] = ccdf[ic] + (1.0-sumwts)*trend[ny-iy-1,ix,ic] + ccdf[ic] = ccdf[ic] + \ + (1.0-sumwts)*trend[ny-iy-1, ix, ic] else: ccdf[ic] = ccdf[ic] + (1.0-sumwts)*gcdf[ic] @@ -3148,65 +3334,85 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm # Correct and write the distribution to the output file: nk = nk + 1 - ccdfo = ordrel(ivtype,ncut,ccdf) + ccdfo = ordrel(ivtype, ncut, ccdf) # Write the IK CCDF for this grid node: if koption == 0: - ikout[ny-iy-1,ix,:] = ccdfo + ikout[ny-iy-1, ix, :] = ccdfo else: - print('TBD') + print('TBD') return ikout -def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtcol,zmin,zmax,ltail,ltpar,utail,utpar,nsim, - nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin,ndmax,nodmax,mults,nmult,noct,radius,radius1,sang1, - mxctx,mxcty,ktype,colocorr,sec_map,vario): -# Parameters from sgsim.inc - MAXNST=2; MAXROT=2; UNEST=-99.0; EPSLON=1.0e-20; VERSION=2.907 - KORDEI=12; MAXOP1=KORDEI+1; MAXINT=2**30 +def sgsim(df, xcol, ycol, vcol, wcol, scol, tmin, tmax, itrans, ismooth, dftrans, tcol, twtcol, zmin, zmax, ltail, ltpar, utail, utpar, nsim, + nx, xmn, xsiz, ny, ymn, ysiz, seed, ndmin, ndmax, nodmax, mults, nmult, noct, radius, radius1, sang1, + mxctx, mxcty, ktype, colocorr, sec_map, vario): + + # Parameters from sgsim.inc + MAXNST = 2 + MAXROT = 2 + UNEST = -99.0 + EPSLON = 1.0e-20 + VERSION = 2.907 + KORDEI = 12 + MAXOP1 = KORDEI+1 + MAXINT = 2**30 # Set other parameters np.random.seed(seed) nxy = nx*ny - sstrat = 0 # search data and nodes by default, turned off if unconditional + sstrat = 0 # search data and nodes by default, turned off if unconditional radsqd = radius * radius sanis1 = radius1/radius - if ktype == 4: varred = 1.0 + if ktype == 4: + varred = 1.0 # load the variogram nst = int(vario['nst']) - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst,dtype=int) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst, dtype=int) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Set the constants MAXCTX = mxctx MAXCTY = mxcty MAXCXY = MAXCTX * MAXCTY - MAXX = nx - MAXY = ny - MAXZ = 1 # assuming 2D for now - MXY = MAXX * MAXY - if MXY < 100: MXY = 100 + MAXX = nx + MAXY = ny + MAXZ = 1 # assuming 2D for now + MXY = MAXX * MAXY + if MXY < 100: + MXY = 100 MAXNOD = nodmax MAXSAM = ndmax MAXKR1 = MAXNOD + MAXSAM + 1 # print('MAXKR1'); print(MAXKR1) MAXKR2 = MAXKR1 * MAXKR1 MAXSBX = 1 - if nx > 1: + if nx > 1: MAXSBX = int(nx/2) - if MAXSBX > 50: MAXSBX=50 + if MAXSBX > 50: + MAXSBX = 50 MAXSBY = 1 if ny > 1: MAXSBY = int(ny/2) - if MAXSBY > 50: MAXSBY=50 + if MAXSBY > 50: + MAXSBY = 50 MAXSBZ = 1 MAXSB = MAXSBX*MAXSBY*MAXSBZ @@ -3214,12 +3420,14 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Declare arrays dist = np.zeros(ndmax) - nums = np.zeros(ndmax,dtype = int) + nums = np.zeros(ndmax, dtype=int) # Perform some quick checks - if nx > MAXX or ny> MAXY: - print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') - print(' you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.') + if nx > MAXX or ny > MAXY: + print('ERROR: available grid size: ' + str(MAXX) + + ',' + str(MAXY) + ',' + str(MAXZ) + '.') + print(' you have asked for : ' + str(nx) + + ',' + str(ny) + ',' + str(nz) + '.') return sim if ltail != 1 and ltail != 2: print('ERROR invalid lower tail option ' + str(ltail)) @@ -3243,25 +3451,29 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc return sim # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values vr_orig = np.copy(vr) # print('size of data extract'); print(len(vr)) - wt = []; wt = np.array(wt) + wt = [] + wt = np.array(wt) if wcol > -1: wt = df_extract[wcol].values else: wt = np.ones(nd) - sec = []; sec = np.array(sec) + sec = [] + sec = np.array(sec) if scol > -1: sec = df_extract[scol].values if itrans == 1: if ismooth == 1: - dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] + dftrans_extract = dftrans.loc[( + dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] ntr = len(dftrans_extract) vrtr = dftrans_extrac[tcol].values if twtcol > -1: @@ -3274,85 +3486,94 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc vrgtr = np.copy(wt) twt = np.sum(vrgtr) # sort - vrtr,vrgtr = dsortem(0,ntr,vrtr,2,b=vrgtr) + vrtr, vrgtr = dsortem(0, ntr, vrtr, 2, b=vrgtr) # Compute the cumulative probabilities and write transformation table - twt = max(twt,EPSLON) + twt = max(twt, EPSLON) oldcp = 0.0 - cp = 0.0 + cp = 0.0 # print('ntr'); print(ntr) - for j in range(0,ntr): - cp = cp + vrgtr[j]/twt - w = (cp + oldcp)*0.5 + for j in range(0, ntr): + cp = cp + vrgtr[j]/twt + w = (cp + oldcp)*0.5 vrg = gauinv(w) - oldcp = cp + oldcp = cp # Now, reset the weight to the normal scores value: vrgtr[j] = vrg twt = np.sum(wt) # Normal scores transform the data - for id in range(0,nd): + for id in range(0, nd): if itrans == 1: vrr = vr[id] - j = dlocate(vrtr,1,nd,vrr) - j = min(max(0,j),(nd-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[nd-1]): vrg = vrgtr[nd-1] + j = dlocate(vrtr, 1, nd, vrr) + j = min(max(0, j), (nd-2)) + vrg = dpowint(vrtr[j], vrtr[j+1], vrgtr[j], + vrgtr[j+1], vrr, 1.0) + if vrg < vrgtr[0]: + vrg = vrgtr[0] + if(vrg > vrgtr[nd-1]): + vrg = vrgtr[nd-1] vr[id] = vrg - weighted_stats_orig = DescrStatsW(vr_orig,weights=wt) + weighted_stats_orig = DescrStatsW(vr_orig, weights=wt) orig_av = weighted_stats_orig.mean orig_ss = weighted_stats_orig.var - weighted_stats = DescrStatsW(vr,weights=wt) + weighted_stats = DescrStatsW(vr, weights=wt) av = weighted_stats.mean ss = weighted_stats.var print('\n Data for SGSIM: Number of acceptable data = ' + str(nd)) - print(' Number trimmed = ' + str(len(df)- nd)) - print(' Weighted Average = ' + str(round(orig_av,4))) - print(' Weighted Variance = ' + str(round(orig_ss,4))) - print(' Weighted Transformed Average = ' + str(round(av,4))) - print(' Weighted Transformed Variance = ' + str(round(ss,4))) + print(' Number trimmed = ' + str(len(df) - nd)) + print(' Weighted Average = ' + + str(round(orig_av, 4))) + print(' Weighted Variance = ' + + str(round(orig_ss, 4))) + print(' Weighted Transformed Average = ' + str(round(av, 4))) + print(' Weighted Transformed Variance = ' + str(round(ss, 4))) # Read in secondary data sim = np.random.rand(nx*ny) index = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): sim[index] = index - lvm = []; lvm = np.array(lvm) + lvm = [] + lvm = np.array(lvm) if ktype >= 2: #lvm = np.copy(sec_map.flatten()) ind = 0 lvm = np.zeros(nxy) - for iy in range(0,ny): - for ix in range(0,nx): - lvm[ind] = sec_map[ny-iy-1,ix] + for iy in range(0, ny): + for ix in range(0, nx): + lvm[ind] = sec_map[ny-iy-1, ix] ind = ind + 1 if ktype == 2 and itrans == 1: - for ixy in range(0,nxy): -# Do we to transform the secondary variable for a local mean? + for ixy in range(0, nxy): + # Do we to transform the secondary variable for a local mean? vrr = lvm[ixy] - j = dlocate(vrtr,1,ntr,vrr) - j = min(max(0,j),(ntr-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[ntr-1]): vrg = vrgtr[nd-1] + j = dlocate(vrtr, 1, ntr, vrr) + j = min(max(0, j), (ntr-2)) + vrg = dpowint(vrtr[j], vrtr[j+1], vrgtr[j], + vrgtr[j+1], vrr, 1.0) + if vrg < vrgtr[0]: + vrg = vrgtr[0] + if(vrg > vrgtr[ntr-1]): + vrg = vrgtr[nd-1] lvm[ixy] = vrg av = np.average(lvm) ss = np.var(lvm) print(' Secondary Data: Number of data = ' + str(nx*ny)) - print(' Equal Weighted Average = ' + str(round(av,4))) - print(' Equal Weighted Variance = ' + str(round(ss,4))) + print(' Equal Weighted Average = ' + str(round(av, 4))) + print(' Equal Weighted Variance = ' + str(round(ss, 4))) # Do we need to work with data residuals? (Locally Varying Mean) if ktype == 2: sec = np.zeros(nd) - for idd in range(0,nd): - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) index = ix + (iy-1)*nx sec[idd] = lvm[index] # Calculation of residual moved to krige subroutine: vr(i)=vr(i)-sec(i) @@ -3360,10 +3581,10 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Do we need to get an external drift attribute for the data? if ktype == 3: - for idd in range(0,nd): + for idd in range(0, nd): if sec[i] != UNEST: - ix = getindx(nx,xmn,xsiz,x[idd]) - iy = getindx(ny,ymn,ysiz,y[idd]) + ix = getindx(nx, xmn, xsiz, x[idd]) + iy = getindx(ny, ymn, ysiz, y[idd]) ind = ix + (iy)*nx sec[ind] = lvm[ind] @@ -3371,53 +3592,57 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if ktype == 4: order_sec = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order_sec[ixy] = ind ind = ind + 1 print(' Transforming Secondary Data with') print(' variance reduction of ' + str(varred)) - lvm,order_sec = dsortem(0,nxy,lvm,2,b=order_sec) + lvm, order_sec = dsortem(0, nxy, lvm, 2, b=order_sec) oldcp = 0.0 - cp = 0.0 - for i in range(0,nxy): - cp = cp + (1.0/(nxy)) - w = (cp + oldcp)/2.0 + cp = 0.0 + for i in range(0, nxy): + cp = cp + (1.0/(nxy)) + w = (cp + oldcp)/2.0 lvm[i] = gauinv(w) lvm[i] = lvm[i] * varred - oldcp = cp - order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) + oldcp = cp + order_sec, lvm = dsortem(0, nxy, order_sec, 2, b=lvm) # return np.reshape(lvm,(ny,nx)) # check the transform # Set up the rotation/anisotropy matrices that are needed for the # variogram and search. print('Setting up rotation matrices for variogram and search') if nst == 1: - rotmat = setrot(ang[0],ang[0],sang1,anis[0],anis[0],sanis1,nst,MAXROT=2) + rotmat = setrot(ang[0], ang[0], sang1, anis[0], + anis[0], sanis1, nst, MAXROT=2) else: - rotmat = setrot(ang[0],ang[1],sang1,anis[0],anis[1],sanis1,nst,MAXROT=2) - isrot = 2 # search rotation is appended as 3rd + rotmat = setrot(ang[0], ang[1], sang1, anis[0], + anis[1], sanis1, nst, MAXROT=2) + isrot = 2 # search rotation is appended as 3rd - rotmat_2d, maxcov = setup_rotmat2(c0,nst,it,cc,ang) # will use one in the future + rotmat_2d, maxcov = setup_rotmat2( + c0, nst, it, cc, ang) # will use one in the future # print('MaxCov = ' + str(maxcov)) # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Set up the covariance table and the spiral search: - cov_table,tmp,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,rotmat,radsqd) + cov_table, tmp, order, ixnode, iynode, nlooku, nctx, ncty = ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MXY, + xsiz, ysiz, isrot, nx, ny, nst, c0, cc, aa, it, ang, anis, rotmat, radsqd) # print('Covariance Table'); print(cov_table) # MAIN LOOP OVER ALL THE SIMULAUTIONS: - for isim in range(0,nsim): + for isim in range(0, nsim): -# Work out a random path for this realization: + # Work out a random path for this realization: sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order[ixy] = ind ind = ind + 1 @@ -3425,35 +3650,37 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # somewhat arbitrary): if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) + for imult in range(0, nmult): + nny = int(max(1, ny/((imult+1)*4))) + nnx = int(max(1, nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 + jy = 1 + jx = 1 + for iy in range(0, nny): + if nny > 0: + jy = iy*(imult+1)*4 + for ix in range(0, nnx): + if nnx > 0: + jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) # Initialize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) + sim, order = dsortem(0, nxy, sim, 2, b=order) sim.fill(UNEST) print('Working on realization number ' + str(isim)) # Assign the data to the closest grid node: TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + # print('data'); print(x[idd],y[idd]) + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) test = abs(xx-x[idd]) + abs(yy-y[idd]) @@ -3468,16 +3695,18 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc sim[ind] = id2 # Assign a flag so that this node does not get simulated: - if sstrat == 0 and test <= TINY: sim[ind]=10.0*UNEST + if sstrat == 0 and test <= TINY: + sim[ind] = 10.0*UNEST # Now, enter data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0, nxy): idd = int(sim[ind]+0.5) - if idd > 0: sim[ind] = vr[id] - irepo = max(1,min((nxy/10),10000)) + if idd > 0: + sim[ind] = vr[id] + irepo = max(1, min((nxy/10), 10000)) # MAIN LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0, nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) @@ -3485,12 +3714,13 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # not been assigned a value already: index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): + continue + iy = int((index)/nx) + ix = index - (iy)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz - current_node = (yy,xx) + current_node = (yy, xx) # print('Current_node'); print(current_node) # Now, we'll simulate the point ix,iy,iz. First, get the close data @@ -3499,29 +3729,33 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # simulated grid nodes: if sstrat == 0: -# print('searching for nearest data') + # print('searching for nearest data') na = -1 # accounting for 0 as first index if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist = np.zeros(1) + nums = np.zeros(1) + # use kd tree for fast nearest data search + dist[0], nums[0] = tree.query(current_node, ndmax) else: - dist, nums = tree.query(current_node,ndmax) + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) - nums = nums[dist (UNEST+EPSLON): - simval = backtr_value(simval,vrtr,vrgtr,zmin,zmax,ltail,ltpar,utail,utpar) - if simval < zmin: simval = zmin - if simval > zmax: simval = zmax + simval = backtr_value( + simval, vrtr, vrgtr, zmin, zmax, ltail, ltpar, utail, utpar) + if simval < zmin: + simval = zmin + if simval > zmax: + simval = zmax sim[ind] = simval # print('simulated value = ' + str(sim[ind]) + ' at location index = ' + str(ind)) - av = av / max(ne,1.0) - ss =(ss / max(ne,1.0)) - av * av + av = av / max(ne, 1.0) + ss = (ss / max(ne, 1.0)) - av * av print('\n Realization ' + str(isim) + ': number = ' + str(ne)) - print(' mean = ' + str(round(av,4)) + ' (close to 0.0?)') - print(' variance = ' + str(round(ss,4)) + ' (close to gammabar(V,V)? approx. 1.0)') + print(' mean = ' + + str(round(av, 4)) + ' (close to 0.0?)') + print(' variance = ' + + str(round(ss, 4)) + ' (close to gammabar(V,V)? approx. 1.0)') # END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] + sim_out = np.zeros((ny, nx)) + for ind in range(0, nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1, ix] = sim[ind] return sim_out -def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin, - ndmax,nodmax,mults,nmult,noct,radius,ktype,vario): +def sisim(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, zmin, zmax, ltail, ltpar, middle, mpar, utail, utpar, nx, xmn, xsiz, ny, ymn, ysiz, seed, ndmin, + ndmax, nodmax, mults, nmult, noct, radius, ktype, vario): """A 2D version of GSLIB's SISIM Indicator Simulation program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). WARNING: only tested for cateogrical ktype 0, 1 and 2 (locally variable proportion). @@ -3653,40 +3895,42 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin EPSLON = 1.0e-20 VERSION = 0.001 np.random.seed(seed) - colocorr = 0.0 # no collocated cokriging - lvm = 0 # no kriging with a locally variable mean - sec = []; sec = np.array(sec) # no secondary data - ng = 0 # no tabulated values + colocorr = 0.0 # no collocated cokriging + lvm = 0 # no kriging with a locally variable mean + sec = [] + sec = np.array(sec) # no secondary data + ng = 0 # no tabulated values # Find the needed paramters: PMX = 9999.9 MAXSAM = ndmax + 1 MAXEQ = MAXSAM + 1 - nxy = nx*ny + nxy = nx*ny mik = 0 # full indicator kriging use_trend = False - trend1d = np.zeros((nxy,1)) # no trend make a dummy trend + trend1d = np.zeros((nxy, 1)) # no trend make a dummy trend if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: - trend1d = np.zeros((nxy,ncut)) + trend1d = np.zeros((nxy, ncut)) use_trend = True index = 0 - for iy in range(0,ny): - for ix in range(0,nx): - for ic in range(0,ncut): - trend1d[index,ic] = trend[ny-iy-1,ix,ic] # copy trend + for iy in range(0, ny): + for ix in range(0, nx): + for ic in range(0, ncut): + trend1d[index, ic] = trend[ny-iy-1, ix, ic] # copy trend index = index + 1 MAXORD = nxy MAXNOD = nodmax - cnodeiv = np.zeros((ncut+1,MAXNOD)) + cnodeiv = np.zeros((ncut+1, MAXNOD)) tmp = np.zeros(MAXORD) - sstrat = 0 # search data and nodes by default, turned off if unconditional - sang1 = 0 # using isotropic search now + sstrat = 0 # search data and nodes by default, turned off if unconditional + sang1 = 0 # using isotropic search now sanis1 = 1.0 # No covariance lookup table - mxctx = int(radius/xsiz)*2+1; mxcty = int(radius/xsiz)*2+1 + mxctx = int(radius/xsiz)*2+1 + mxcty = int(radius/xsiz)*2+1 # print('cov table / spiral search nx, ny '); print(mxctx); print(mxcty) MAXCTX = mxctx MAXCTY = mxcty @@ -3704,45 +3948,55 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin if nx > 1: MAXSBX = int(nx/2) if MAXSBX > 50: - MAXSBX=50 + MAXSBX = 50 MAXSBY = 1 if ny > 1: MAXSBY = int(ny/2) if MAXSBY > 50: - MAXSBY=50 + MAXSBY = 50 # print('ncut'); print(ncut) # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) - aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) - ang = np.zeros((ncut,MAXNST)); anis = np.zeros((ncut,MAXNST)) + nst = np.zeros(ncut, dtype=int) + c0 = np.zeros(ncut) + cc = np.zeros((ncut, MAXNST)) + aa = np.zeros((ncut, MAXNST), dtype=int) + it = np.zeros((ncut, MAXNST), dtype=int) + ang = np.zeros((ncut, MAXNST)) + anis = np.zeros((ncut, MAXNST)) # print('varios - 1 vario'); print(vario[1]) - for icut in range(0,ncut): -# print('icut'); print(icut) + for icut in range(0, ncut): + # print('icut'); print(icut) nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; - ang[icut,0] = vario[icut]['azi1']; - aa[icut,0] = vario[icut]['hmaj1']; anis[icut,0] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + c0[icut] = vario[icut]['nug'] + cc[icut, 0] = vario[icut]['cc1'] + it[icut, 0] = vario[icut]['it1'] + ang[icut, 0] = vario[icut]['azi1'] + aa[icut, 0] = vario[icut]['hmaj1'] + anis[icut, 0] = vario[icut]['hmin1']/vario[icut]['hmaj1'] if nst[icut] == 2: - cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; - aa[icut,1] = vario[icut]['hmaj2']; anis[icut,1] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + cc[icut, 1] = vario[icut]['cc2'] + it[icut, 1] = vario[icut]['it2'] + ang[icut, 1] = vario[icut]['azi2'] + aa[icut, 1] = vario[icut]['hmaj2'] + anis[icut, 1] = vario[icut]['hmin2']/vario[icut]['hmaj2'] # print('check loaded cov model- icut '); print(icut) # print(cc[icut],aa[icut],it[icut],ang[icut],anis[icut]) - # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] MAXDAT = len(df_extract) nd = MAXDAT MAXCUT = ncut MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) + MAXROT = MAXNST*MAXCUT + 1 + ikout = np.zeros((nx, ny, ncut)) maxcov = np.zeros(ncut) # Allocate the needed memory: @@ -3755,8 +4009,8 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin rr = np.zeros(MAXEQ) s = np.zeros(MAXEQ) a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) + ikmap = np.zeros((nx, ny, ncut)) + vr = np.zeros((MAXDAT, MAXCUT+1)) nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) @@ -3764,13 +4018,13 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) + ikout = np.zeros((nx, ny, ncut)) x = df_extract[xcol].values y = df_extract[ycol].values v = df_extract[vcol].values - MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used + MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used gcut = np.zeros(MAXTAB) # The indicator data are constructed knowing the thresholds and the @@ -3778,32 +4032,35 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('ncut'); print(ncut) if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + vr[:, icut] = np.where( + (v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v + for icut in range(0, ncut): + vr[:, icut] = np.where(v <= thresh[icut], '1', '0') + vr[:, ncut] = v # print('loaded data '); print(vr) # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, MAXDAT)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics of the input data - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() + avg = vr[:, ncut].mean() + stdev = vr[:, ncut].std() ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() + vrmin = vr[:, ncut].min() + vrmax = vr[:, ncut].max() print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - actloc = np.zeros(MAXDAT, dtype = int) # need to set up data at node locations - for i in range(1,MAXDAT): + # need to set up data at node locations + actloc = np.zeros(MAXDAT, dtype=int) + for i in range(1, MAXDAT): actloc[i] = i # Set up the rotation/anisotropy matrices that are needed for the @@ -3812,24 +4069,27 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[ic],cc[ic],ang[ic],9999.9) + for ic in range(0, ncut): + rotmat_temp, maxcov[ic] = setup_rotmat( + c0[ic], int(nst[ic]), it[ic], cc[ic], ang[ic], 9999.9) rotmat.append(rotmat_temp) - #return rotmat + # return rotmat # Set up the covariance table and the spiral search based just on the first variogram # This is ok as we are not using the covariance look up table, just spiral search for previous nodes - isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here + isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here # print('ang[0]'); print(ang[0]) if nst[0] == 1: - global_rotmat = setrot(ang[0,0],ang[0,0],sang1,anis[0,0],anis[0,0],sanis1,nst[0],MAXROT=2) + global_rotmat = setrot( + ang[0, 0], ang[0, 0], sang1, anis[0, 0], anis[0, 0], sanis1, nst[0], MAXROT=2) else: - global_rotmat = setrot(ang[0,0],ang[1,0],sang1,anis[0,0],anis[1,0],sanis1,nst[0],MAXROT=2) + global_rotmat = setrot( + ang[0, 0], ang[1, 0], sang1, anis[0, 0], anis[1, 0], sanis1, nst[0], MAXROT=2) - cov_table,tmp2,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) + cov_table, tmp2, order, ixnode, iynode, nlooku, nctx, ncty = ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MXY, + xsiz, ysiz, isrot, nx, ny, nst[0], c0[0], cc[0], aa[0], it[0], ang[0], anis[0], global_rotmat, radsqd) # print('spiral search number nodes '); print(nlooku) @@ -3838,21 +4098,21 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin nk = 0 xk = 0.0 vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 + for icut in range(0, ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 xviol[icut] = -1.0 # print('Working on the kriging') # Report on progress from time to time: if koption == 0: - nxy = nx*ny + nxy = nx*ny nloop = nxy - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) else: nloop = 10000000 - irepo = max(1,min((nd/10),10000)) + irepo = max(1, min((nd/10), 10000)) ddh = 0.0 # MAIN LOOP OVER ALL THE SIMULAUTIONS: @@ -3862,7 +4122,7 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order[ixy] = ind ind = ind + 1 @@ -3870,22 +4130,24 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # soat arbitrary): if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) + for imult in range(0, nmult): + nny = int(max(1, ny/((imult+1)*4))) + nnx = int(max(1, nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 + jy = 1 + jx = 1 + for iy in range(0, nny): + if nny > 0: + jy = iy*(imult+1)*4 + for ix in range(0, nnx): + if nnx > 0: + jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) # Inlize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) + sim, order = dsortem(0, nxy, sim, 2, b=order) sim.fill(UNEST) tmp.fill(0.0) print('Working on a single realization, seed ' + str(seed)) @@ -3894,13 +4156,13 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # As the data to the closest grid node: TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + # print('data'); print(x[idd],y[idd]) + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) test = abs(xx-x[idd]) + abs(yy-y[idd]) @@ -3918,29 +4180,30 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # Another data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0, nxy): idd = int(sim[ind]+0.5) if idd > 0: sim[ind] = vr[idd] else: tmp[ind] = sim[ind] sim[ind] = UNEST - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) # LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0, nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) # Find the index on the random path, check if assigned data and get location index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): + continue + iy = int((index)/nx) + ix = index - (iy)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz - current_node = (yy,xx) + current_node = (yy, xx) # print('Current_node'); print(current_node) # Now we'll simulate the point ix,iy,iz. First, get the close data @@ -3949,22 +4212,25 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # simulated grid nodes: if sstrat == 0: -# print('searching for nearest data') + # print('searching for nearest data') na = -1 # accounting for 0 as first index if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist = np.zeros(1) + nums = np.zeros(1) + # use kd tree for fast nearest data search + dist[0], nums[0] = tree.query(current_node, ndmax) else: - dist, nums = tree.query(current_node,ndmax) + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) - nums = nums[dist 0: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + cnodeiv[icut, :] = np.where( + (cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') - cnodeiv[ncut,:] = cnodev + for icut in range(0, ncut): + cnodeiv[icut, :] = np.where(cnodev <= thresh[icut], '1', '0') + cnodeiv[ncut, :] = cnodev # print('indicator transformed nearest nodes'); print(cnodeiv) @@ -3994,53 +4262,54 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('nums'); print(nums) # What cdf value are we looking for? - zval = UNEST + zval = UNEST cdfval = np.random.rand() # Use the global distribution? # check inputs # print('nst'); print(nst) - - if nclose + ncnode <= 0: -# print('nclose & ncnode'); print(nclose, ncnode) - zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + # print('nclose & ncnode'); print(nclose, ncnode) + zval = beyond(ivtype, ncut, thresh, gcdf, ng, gcut, gcdf, zmin, + zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval) else: -# print('kriging') -# Estimate the local distribution by indicator kriging: -# print('maxcov'); print(maxcov) - for ic in range(0,ncut): -# print('check kriging cov model- icut '); print(ic) -# print('node data values for kriging'); print(cnodev) -# print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) - #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) + # print('kriging') + # Estimate the local distribution by indicator kriging: + # print('maxcov'); print(maxcov) + for ic in range(0, ncut): + # print('check kriging cov model- icut '); print(ic) + # print('node data values for kriging'); print(cnodev) + # print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) + # ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) if ktype == 0: gmean = gcdf[ic] elif ktype == 2: - gmean = trend1d[index,ic] + gmean = trend1d[index, ic] else: - gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used + gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used # print('gmean'); print(gmean) - ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, - icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], - rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) + ccdf[ic], cstdev = ikrige(ix, iy, nx, ny, xx, yy, ktype, x, y, vr[:, ic], sec, colocorr, gmean, trend[:, ic], nums, cov_table, nctx, ncty, + icnode, ixnode, iynode, cnodeiv[ic], cnodex, cnodey, nst[ic], c0[ + ic], 9999.9, cc[ic], aa[ic], it[ic], ang[ic], anis[ic], + rotmat[ic], maxcov[ic], MAXCTX, MAXCTY, MAXKR1, MAXKR2) # print('ccdf'); print(ccdf) # Correct order relations: - ccdfo = ordrel(ivtype,ncut,ccdf) + ccdfo = ordrel(ivtype, ncut, ccdf) # Draw from the local distribution: - zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + zval = beyond(ivtype, ncut, thresh, ccdfo, ng, gcut, gcdf, zmin, + zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval) sim[index] = zval # print('zval'); print(zval) # END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] + sim_out = np.zeros((ny, nx)) + for ind in range(0, nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1, ix] = sim[ind] return sim_out @@ -4095,15 +4364,24 @@ def kb2d_locations( # load the variogram nst = vario['nst'] - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Allocate the needed memory: xa = np.zeros(MAXSAM) @@ -4119,9 +4397,10 @@ def kb2d_locations( vlist = np.zeros(len(df_loc)) # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values @@ -4133,9 +4412,10 @@ def kb2d_locations( vr_loc = df_loc[vcol].values # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics for the data after trimming avg = vr.mean() @@ -4145,16 +4425,17 @@ def kb2d_locations( vrmax = vr.max() # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius*radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(0.0, 0.0, 0.0, 0.0, nst, c0, PMX, cc, + aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov cbb = cov - first = False + first = False # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: nk = 0 @@ -4165,65 +4446,70 @@ def kb2d_locations( print('Working on location ' + str(idata)) xloc = x_loc[idata] yloc = y_loc[idata] - current_node = (yloc,xloc) + current_node = (yloc, xloc) # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + # use kd tree for fast nearest data search + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius na = len(dist) - nums = nums[dist Date: Fri, 1 Nov 2019 18:36:42 -0500 Subject: [PATCH 03/10] remove parallelization --- geostatspy/geostats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 02f006d..8d35d62 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -1879,7 +1879,7 @@ def gamv(df, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill) -@jit(**JITKW, parallel=True) +@jit(**JITKW, parallel=False) def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): # Summary statistics for the data after trimming @@ -1910,7 +1910,7 @@ def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, at return dis, vario, npp -@jit(**JITKW, parallel=True) +@jit(**JITKW, parallel=False) def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): """Calculate the variogram by looping over combinatorial of data pairs. :param x: x values @@ -2676,7 +2676,7 @@ def kb2d( _weights = None -@jit(**JITKW, parallel=False) # numba crashed on parallel=True +@jit(**JITKW, parallel=False) # numba crashed on parallel=False def _kb2d_jit( tree, nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, From ca46a0a5d2950083c174cdc5513d8ef9caeab22e Mon Sep 17 00:00:00 2001 From: David Fulford Date: Tue, 21 Jan 2020 10:30:33 -0600 Subject: [PATCH 04/10] some cleanup of testing code --- geostatspy/geostats.py | 220 +++++++++++++++++++---------------------- 1 file changed, 100 insertions(+), 120 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 8d35d62..971a20d 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -2326,11 +2326,6 @@ def nscore( return ns, vr, wt_ns -_nums = [] -_dists = [] -_weights = [] - - def kb2d( df, xcol, @@ -2448,12 +2443,6 @@ def kb2d( vrmin = vr.min() vrmax = vr.max() - global _nums - global _dists - global _weights - _nums = [] - _dists = [] - _weights = [] # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the @@ -2655,10 +2644,6 @@ def kb2d( ak = ak + est vk = vk + est*est - if xloc == 21 and yloc == 102: - _nums.append(nums.copy()) - _dists.append(dist.copy()) - _weights.append(s) # END OF MAIN LOOP OVER ALL THE BLOCKS: @@ -2671,9 +2656,106 @@ def kb2d( return kmap, vmap -_nums = None -_dists = None -_weights = None +def kb2d_jit( + df, + xcol, + ycol, + vcol, + tmin, + tmax, + nx, + xmn, + xsiz, + ny, + ymn, + ysiz, + nxdis, + nydis, + ndmin, + ndmax, + radius, + ktype, + skmean, + vario, +): + """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the + original Fortran to Python by Michael Pyrcz, the University of Texas at + Austin (Jan, 2019). + :param df: pandas DataFrame with the spatial data + :param xcol: name of the x coordinate column + :param ycol: name of the y coordinate column + :param vcol: name of the property column + :param tmin: property trimming limit + :param tmax: property trimming limit + :param nx: definition of the grid system (x axis) + :param xmn: definition of the grid system (x axis) + :param xsiz: definition of the grid system (x axis) + :param ny: definition of the grid system (y axis) + :param ymn: definition of the grid system (y axis) + :param ysiz: definition of the grid system (y axis) + :param nxdis: number of discretization points for a block + :param nydis: number of discretization points for a block + :param ndmin: minimum number of data points to use for kriging a block + :param ndmax: maximum number of data points to use for kriging a block + :param radius: maximum isotropic search radius + :param ktype: + :param skmean: + :param vario: + :return: + """ +# Load the data + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] + nd = len(df_extract) + x = df_extract[xcol].values + y = df_extract[ycol].values + vr = df_extract[vcol].values + +# Make a KDTree for fast search of nearest neighbours + # dp = list((y[i], x[i]) for i in range(0,nd)) + data_locs = np.column_stack((y, x)) + import scipy.spatial as sp + _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) + + vario_int = Dict.empty( + key_type=nb.types.unicode_type, + value_type=nb.types.i8 + ) + + vario_float = Dict.empty( + key_type=nb.types.unicode_type, + value_type=nb.f8 + ) + + vario = copy(vario) + + vario_float['nug'] = vario.pop('nug') + vario_float['cc1'] = vario.pop('cc1') + vario_float['cc2'] = vario.pop('cc2') + + for key in vario.keys(): + vario_int[key] = vario[key] + + tree = Dict.empty( + key_type=nb.types.Tuple((nb.f8, nb.f8)), + value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) + ) + + for iy in range(0, ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0, nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc, xloc) + tree[current_node] = _tree.query( + current_node, ndmax, distance_upper_bound=radius) + + kmap, vmap = _kb2d_jit( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float) + + return kmap, vmap @jit(**JITKW, parallel=False) # numba crashed on parallel=False @@ -2949,108 +3031,6 @@ def _kb2d_jit( return kmap, vmap -def kb2d_jit( - df, - xcol, - ycol, - vcol, - tmin, - tmax, - nx, - xmn, - xsiz, - ny, - ymn, - ysiz, - nxdis, - nydis, - ndmin, - ndmax, - radius, - ktype, - skmean, - vario, -): - """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the - original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Jan, 2019). - :param df: pandas DataFrame with the spatial data - :param xcol: name of the x coordinate column - :param ycol: name of the y coordinate column - :param vcol: name of the property column - :param tmin: property trimming limit - :param tmax: property trimming limit - :param nx: definition of the grid system (x axis) - :param xmn: definition of the grid system (x axis) - :param xsiz: definition of the grid system (x axis) - :param ny: definition of the grid system (y axis) - :param ymn: definition of the grid system (y axis) - :param ysiz: definition of the grid system (y axis) - :param nxdis: number of discretization points for a block - :param nydis: number of discretization points for a block - :param ndmin: minimum number of data points to use for kriging a block - :param ndmax: maximum number of data points to use for kriging a block - :param radius: maximum isotropic search radius - :param ktype: - :param skmean: - :param vario: - :return: - """ -# Load the data - # trim values outside tmin and tmax - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] - nd = len(df_extract) - x = df_extract[xcol].values - y = df_extract[ycol].values - vr = df_extract[vcol].values - -# Make a KDTree for fast search of nearest neighbours - # dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y, x)) - import scipy.spatial as sp - _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, - copy_data=False, balanced_tree=True) - - vario_int = Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.types.i8 - ) - - vario_float = Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.f8 - ) - - vario = copy(vario) - - vario_float['nug'] = vario.pop('nug') - vario_float['cc1'] = vario.pop('cc1') - vario_float['cc2'] = vario.pop('cc2') - - for key in vario.keys(): - vario_int[key] = vario[key] - - tree = Dict.empty( - key_type=nb.types.Tuple((nb.f8, nb.f8)), - value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) - ) - - for iy in range(0, ny): - yloc = ymn + (iy-0)*ysiz - for ix in range(0, nx): - xloc = xmn + (ix-0)*xsiz - current_node = (yloc, xloc) - tree[current_node] = _tree.query( - current_node, ndmax, distance_upper_bound=radius) - - kmap, vmap = _kb2d_jit( - tree, nd, x, y, vr, - xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, - ndmax, radius, ktype, skmean, vario_int, vario_float) - - return kmap, vmap - - def ik2d(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, ndmin, ndmax, radius, ktype, vario): """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at From 9b742cee3db568eabd50f8dc9f4bef98332ca84e Mon Sep 17 00:00:00 2001 From: David Fulford Date: Thu, 31 Oct 2019 14:46:34 -0500 Subject: [PATCH 05/10] accelerate ordinary kriging with numba --- geostatspy/geostats.py | 4014 ++++++++++++++++++---------------------- 1 file changed, 1825 insertions(+), 2189 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 51103ed..09c2242 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -21,10 +21,12 @@ import numpy as np # for ndarrays import numpy.linalg as linalg # for linear algebra import scipy.spatial as sp # for fast nearest neighbor search +import numba as nb # for numerical speed up from numba import jit # for numerical speed up +from numba.typed import Dict # typing of dictionaries from statsmodels.stats.weightstats import DescrStatsW -def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): """Back transform an entire DataFrame column with a provided transformation table and tail extrapolation. :param df: the source DataFrame :param vcol: the column with the variable to transfrom @@ -37,8 +39,8 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utail: upper tail value :param utpar: upper tail extrapolation parameter :return: TODO - """ - + """ + EPSLON=1.0e-20 nd = len(df); nt = len(vr) # number of data to transform and number of data in table backtr = np.zeros(nd) @@ -61,7 +63,7 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): cdfbt = gcum(vrgs[id]) if utail == 1: backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) - elif utail == 2: + elif utail == 2: cpow = 1.0 / utpar backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) elif utail == 4: @@ -74,7 +76,7 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): backtr[id] = powint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs[id],1.0) return backtr -def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): """Back transform a single value with a provided transformation table and tail extrapolation. :param vrgs: value to transform :param vr: the transformation table, 1D ndarray with the original values @@ -86,9 +88,9 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utail: upper tail value :param utpar: upper tail extrapolation parameter :return: TODO - """ + """ EPSLON=1.0e-20 - nt = len(vr) # number of data to transform + nt = len(vr) # number of data to transform # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): if vrgs <= vrg[0]: backtr = vr[0] @@ -106,7 +108,7 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): cdfbt = gcum(vrgs) if utail == 1: backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) - elif utail == 2: + elif utail == 2: cpow = 1.0 / utpar backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) elif utail == 4: @@ -121,18 +123,18 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): def gcum(x): """Calculate the cumulative probability of the standard normal distribution. - :param x: the value from the standard normal distribution + :param x: the value from the standard normal distribution :return: TODO - """ - + """ + z = x - if z < 0: + if z < 0: z = -z t = 1./(1.+ 0.2316419*z) gcum = t*(0.31938153 + t*(-0.356563782 + t*(1.781477937 + t*(-1.821255978 + t*1.330274429)))) e2 = 0. # 6 standard deviations out gets treated as infinity: - if z <= 6.: + if z <= 6.: e2 = np.exp(-z*z/2.)*0.3989422803 gcum = 1.0- e2 * gcum if x >= 0: @@ -150,10 +152,10 @@ def locate(xx,iis,iie,x): :param x: given value :return: TODO """ - + n = len(xx) # Initialize lower and upper methods: - if iis <= 0: + if iis <= 0: iis = 0 if iie >= n: iie = n-1 @@ -163,7 +165,7 @@ def locate(xx,iis,iie,x): j = iie return j # If we are not done then compute a midpoint: - while (ju-jl) > 1: + while (ju-jl) > 1: jm = int((ju+jl)/2) # Replace the lower or upper limit with the midpoint: if (xx[iie] > xx[iis]) == (x > xx[jm]): @@ -193,7 +195,7 @@ def dlocate(xx, iis, iie, x): return j def powint(xlow,xhigh,ylow,yhigh,xval,power): - """Power-based interpolator + """Power-based interpolator :param xlow: x lower interval :param xhigh: x upper interval :param ylow: y lower interval @@ -201,7 +203,7 @@ def powint(xlow,xhigh,ylow,yhigh,xval,power): :param xval: value on x :param power: power for interpolation :return: TODO - """ + """ EPSLON=1.0e-20 if (xhigh-xlow) < EPSLON: powint = (yhigh+ylow)/2.0 @@ -486,18 +488,18 @@ def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): :param y1: y coordinate of first point :param x2: x coordinate of second point :param y2: y coordinate of second point - :param ist: structure index - :param rotmat: 2d rotation matrix + :param ist: structure index + :param rotmat: 2d rotation matrix :param anis: 2D anisotropy ratio :return: TODO - """ - + """ + # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 dx1 = (dx*rotmat[0,ist] + dy*rotmat[1,ist]) dy1 = (dx*rotmat[2,ist] + dy*rotmat[3,ist])/anis[ist] - sqdist_ = (dx1*dx1+dy1*dy1) + sqdist_ = (dx1*dx1+dy1*dy1) return sqdist_ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): @@ -521,7 +523,7 @@ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): rotmat[0,1,2] = sina rotmat[0,2,1] = afac1*(-sina) rotmat[0,2,2] = afac1*(cosa) -# 2nd structure if present +# 2nd structure if present if nst > 1: if ang2 >= 0.0 and ang2 < 270.0: alpha = (90.0 - ang2) * DEG2RAD @@ -553,6 +555,7 @@ def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): # Return to calling program: return rotmat +@jit(nopython=True) def ksol_numpy(neq, a, r): """Find solution of a system of linear equations. :param neq: number of equations @@ -562,9 +565,10 @@ def ksol_numpy(neq, a, r): """ a = a[0: neq * neq] # trim the array a = np.reshape(a, (neq, neq)) # reshape to 2D - ainv = linalg.inv(a) # invert matrix + ainv = linalg.inv(a).copy() # invert matrix r = r[0: neq] # trim the array - s = np.matmul(ainv, r) # matrix multiplication + # s = np.matmul(ainv, r) # matrix multiplication + s = ainv @ r # matrix multiplication return s def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,global_rotmat,radsqd): @@ -580,16 +584,16 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Size of the look-up table: tmp = np.zeros(MAXXYZ) MAXORD = MAXXYZ - if (nx*ny) < MAXCXY: + if (nx*ny) < MAXCXY: MAXORD = MAXCXY - + order = np.zeros(MAXORD) nctx = int(min(((MAXCTX-1)/2),(nx-1))) ncty = int(min(((MAXCTY-1)/2),(ny-1))) - + # print('CTable check') # print('nctx ' + str(nctx) + ', ncty ' + str(ncty)) - + ixnode = np.zeros(MAXXYZ) iynode = np.zeros(MAXXYZ) covtab = np.zeros((MAXCTX,MAXCTY)) @@ -600,41 +604,41 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Now, set up the table and keep track of the node offsets that are # within the search radius: nlooku = -1 # adjusted for 0 origin - for i in range(-nctx,nctx+1): # cover entire range + for i in range(-nctx,nctx+1): # cover entire range xx = i * xsiz ic = nctx + i - for j in range(-ncty,ncty+1): # cover entire range + for j in range(-ncty,ncty+1): # cover entire range yy = j * ysiz jc = ncty + j covtab[ic,jc] = cova2(0.0,0.0,xx,yy,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) # print('cov table offset'); print(xx,yy); print(covtab[ic,jc]) hsqd = sqdist(0.0,0.0,0.0,xx,yy,0.0,MAXROT,global_rotmat) - if hsqd <= radsqd: + if hsqd <= radsqd: nlooku = nlooku + 1 # We want to search by closest variogram distance (and use the # anisotropic Euclidean distance to break ties: tmp[nlooku] = - (covtab[ic,jc] - TINY*hsqd) order[nlooku] = (jc)*MAXCTX+ic -# print('populated presort'); print(tmp,order) +# print('populated presort'); print(tmp,order) # Finished setting up the look-up table, now order the nodes such # that the closest ones, according to variogram distance, are searched # first. Note: the "loc" array is used because I didn't want to make # special allowance for 2 byte integers in the sorting subroutine: - + nlooku = nlooku + 1 # print('nlooku' + str(nlooku)); print('MAXCTX' + str(MAXCTX)) tmp, order = dsortem(0,nlooku,tmp,2,b=order) # print('populated postsort'); print(tmp,order) - for il in range(0,nlooku): + for il in range(0,nlooku): loc = int(order[il]) iy = int((loc-0)/MAXCTX) ix = loc - (iy-0)*MAXCTX iynode[il] = int(iy) ixnode[il] = int(ix) # print('populated ix, iy node list'); print(ixnode, iynode) - + return covtab,tmp,order,ixnode,iynode,nlooku,nctx,ncty def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nctx,ncty,UNEST): @@ -643,29 +647,29 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc Austin (March, 2019). Note this was simplified to 2D only. """ - + # Consider all the nearby nodes until enough have been found: - ncnode = 0; - icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) + ncnode = 0; + icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) cnodev = np.zeros(nodmax);cnodex = np.zeros(nodmax); cnodey = np.zeros(nodmax); - + # print('Node search at '); print(ix,iy) # print('nlooku'); print(nlooku) if noct > 0: ninoct = np.zeros(8) - for il in range(0,nlooku): + for il in range(0,nlooku): if ncnode == nodmax: return ncnode, icnode, cnodev, cnodex, cnodey i = ix + (int(ixnode[il])-nctx) j = iy + (int(iynode[il])-ncty) # print('i,j'); print(i,j) if i < 0 or j < 0: continue if i >= nx or j >= ny: continue - ind = i + (j)*nx + ind = i + (j)*nx if sim[ind] > UNEST: icnode[ncnode] = il cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin cnodey[ncnode] = ymn + (j)*ysiz - cnodev[ncnode] = sim[ind] + cnodev[ncnode] = sim[ind] # print('srchnd found at index - ' +str(ind) + ' at x and y ' + str(cnodex[ncnode]) + ',' + str(cnodey[ncnode])) # print(' ix = ' + str(i) + ' and iy = ' + str(j)) # print(' value = ' + str(sim[ind])) @@ -681,12 +685,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar EPSLON = 1.0e-20; UNEST=-1.0 # Check for both "zval" and "cdfval" defined or undefined: - ierr = 1; - if zval > UNEST and cdfva > UNEST: + ierr = 1; + if zval > UNEST and cdfva > UNEST: return -1 - if zval <= UNEST and cdfval <= UNEST: + if zval <= UNEST and cdfval <= UNEST: return - 1 - + # Handle the case of a categorical variable: if ivtype == 0: cum = 0 @@ -696,14 +700,14 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar zval = ccut[i] return zval return zval - + # Figure out what part of distribution: ipart = 0 - lower tail # ipart = 1 - middle # ipart = 2 - upper tail ierr = 0 ipart = 1 if zva > UNEST: - if zval <= ccut[0]: + if zval <= ccut[0]: ipart = 0 if zval >= ccut[nccut-1]: ipart = 2 @@ -712,10 +716,10 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar ipart = 0 if cdfval >= ccdf[nccut-1]: ipart = 2 - + # ARE WE IN THE LOWER TAIL? - if ipart == 0: + if ipart == 0: if ltail ==1: # Straight Linear Interpolation: powr = 1.0 @@ -726,12 +730,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar elif ltail == 2: # Power Model interpolation to lower limit "zmin"? - if zval > UNEST: + if zval > UNEST: cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,ltpar) else: powr = 1.0 / ltpar zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) - + # Linear interpolation between the rescaled global cdf? elif ltail == 3: if zval > UNEST: @@ -767,12 +771,12 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Error situation - unacceptable option: ierr = 2 return -1 - + # FINISHED THE LOWER TAIL, ARE WE IN THE MIDDLE? if ipart == 1: # Establish the lower and upper limits: - if zval > UNEST: + if zval > UNEST: cclow = locate(ccut,1,nccut,zval) else: cclow = locate(ccdf,1,nccut,cdfval) @@ -785,7 +789,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,powr) else: zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) - + # Power interpolation between class bounds? elif middle == 2: if zval > UNEST: @@ -793,14 +797,14 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: powr = 1.0 / mpar zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) - + # Linear interpolation between the rescaled global cdf? elif middle == 3: ilow = locate(cut,ncut,1,ncut,ccut[cclow]) iupp = locate(cut,ncut,1,ncut,ccut[cchigh]) - if cut[ilow] < ccut[cclow]: + if cut[ilow] < ccut[cclow]: ilow = ilow + 1 - if cut[iupp] > ccut[cchigh]: + if cut[iupp] > ccut[cchigh]: iupp = iupp - 1 if zval > UNEST: idat = locate(cut,1,ncut,zval) @@ -821,7 +825,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: temp=powint(ccdf[cclow],ccdf[cchigh],cdf[ilow],cdf[iupp],cdfval,1.) idat = locate(cdf,1,ncut,temp) - if cut[idat] < ccut[cclow]: + if cut[idat] < ccut[cclow]: idat=idat+1 if idat <= -1 or idat >= ncut-1 or cut[idat+1] > ccut[cchigh]: zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) @@ -836,13 +840,13 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar return -1 # FINISHED THE MIDDLE, ARE WE IN THE UPPER TAIL? - if ipart == 2: - if utail == 1: + if ipart == 2: + if utail == 1: powr = 1.0 if zval > UNEST: cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,powr) else: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) elif utail == 2: # Power interpolation to upper limit "utpar"? @@ -861,7 +865,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar ilow = locate(cut,1,ncut,ccut(nccut),ilow) if cut[idat] < zval: idat = idat + 1 - if cut[ilow] < ccut[nccut-1]: + if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear @@ -876,7 +880,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Computing Z value: Are there any data out in the tail? ilow = locate(cut,ncut,1,ncut,ccut(nccut),ilow) - if cut[ilow] < ccut[nccut-1]: + if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear @@ -886,7 +890,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar else: temp = powint(ccdf(nccut),1.0,cdf(ilow),1.0,cdfval,1.) idat = locate(cdf,ncut,1,ncut,temp) - if cut[idat] < ccut[nccut-1]: + if cut[idat] < ccut[nccut-1]: idat=idat+1 if idat >= ncut-1: zval = powint(ccdf[nccut-1],1.0,ccut[nccut-1],zmax,cdfval,1.) @@ -898,20 +902,20 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Figure out "lambda" and required info: lambd = math.pow(ccut[nccut],utpar)*(1.0-ccdf[nccut-1]) - if zval > UNEST: + if zval > UNEST: cdfval = 1.0 - (lambd/(math.pow(zval,utpar))) else: - zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) + zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) else: # Error situation - unacceptable option: ierr = 2 return -1 - + if zval < zmin: zval = zmin - if zval > zmax: + if zval > zmax: zval = zmax # All finished - return: @@ -926,19 +930,19 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct """ EPSLON = 1.0e-20 cur_index = ix + (iy)*nx -# print('krige at grid '); print(ix,iy) +# print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) # print('Check ixnode '); print(ixnode); print(iynode) nclose = len(close) - ncnode = (icnode >= 0).sum() + ncnode = (icnode >= 0).sum() # print('In kriging, maxcov = ' + str(maxcov)) # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # print(r.shape) # Local mean if lktype == 2: @@ -959,7 +963,7 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) - + # Set up kriging matrices: iin=-1 # acocunting for 0 origin # print('krige na' + str(na)) @@ -972,13 +976,13 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - if sec.shape[0] > 1: + if sec.shape[0] > 1: vrea[j]= sec[index]; else: vrea[j] = 0.0 # added this - no effect if lktype == 2: vra[j] = vra[j] - vrea[j] else: - + # It is a previously simulated node (keep index for table look-up): # print(j) index = j-(nclose) # adjust for 0 index @@ -986,15 +990,15 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct y1 = cnodey[index] vra[j] = cnodev[index] ind = icnode[index] -# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) +# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) ix1 = ix + (int(ixnode[ind])-nctx-1) iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) index = ix1 + (iy1-1)*nx - if lktype == 2: + if lktype == 2: vrea[j]= lvm[index] vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix + for i in range(0,na): # we need the full matrix # print('kriging indice populated' + str(j) + ',' + str(i)) # Sort out the actual location of point "i" if i < nclose: @@ -1040,37 +1044,37 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - r[j] = cov +# print('ix1,iy1'); print(x1,y1) + r[j] = cov else: # Try to use the covariance look-up (if the distance is in range): # ii = nctx + 1 + (ix - ix1) # jj = ncty + 1 + (iy - iy1) - + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) # print('ix,iy ='); print(ix,iy) # print('ix1,iy1'); print(ix1,iy1) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 # print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) +# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: # cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - +# print('ix1,iy1'); print(x1,y1) + r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] if lktype == 1: # we need the full array iin = iin + 1 - a[iin] = 1.0 + a[iin] = 1.0 if lktype == 4: # we need the full array iin = iin + 1 - a[iin] = colocorr*r[j] + a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: for i in range(0,na): @@ -1114,12 +1118,12 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct ii = na r[ii] = colocorr rr[ii] = r[ii] -# if (sfmax-sfmin) < EPSLON: +# if (sfmax-sfmin) < EPSLON: # neq = neq - 1 # colc = False # Solve the Kriging System: -# print('neq = ' + str(neq)); +# print('neq = ' + str(neq)); # print('a'); print(a) # print('r'); print(r) # print('data'); print(vra) @@ -1129,11 +1133,11 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct else: # print('neq prior ksol' + str(neq)) s = ksol_numpy(neq,a,r) -# print('neq post ksol' + str(neq)) +# print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) -# print('r'); print(r) +# print('r'); print(r) ising = 0 # need to figure this out # print('s'); print(s) @@ -1148,13 +1152,13 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('kriging weights'); print(s) cmean = 0.0 # print('cbb = ' + str(cbb)) - cstdev = cbb + cstdev = cbb sumwts = 0.0 for i in range(0,na): cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] - if lktype == 1: + if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) if lktype == 2: cmean = cmean + gmean @@ -1184,25 +1188,25 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n """ EPSLON = 1.0e-20 cur_index = ix + (iy)*nx -# print('krige at grid '); print(ix,iy) +# print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) # print('Check ixnode '); print(ixnode); print(iynode) nclose = len(close) - ncnode = (icnode >= 0).sum() + ncnode = (icnode >= 0).sum() # print('In kriging, maxcov = ' + str(maxcov)) # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # print(r.shape) -# Local mean # just pass the local probability as gmean +# Local mean # just pass the local probability as gmean # if lktype == 2: # gmean = lvm[cur_index] -# keep input gmean otherwise +# keep input gmean otherwise # Size of the kriging system: first = False @@ -1217,7 +1221,7 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) - + # print('kriging data close'); print(close) # print('kriging node close'); print(icnode) # Set up kriging matrices: @@ -1232,13 +1236,13 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) -# if lvm.shape[0] > 1: +# if lvm.shape[0] > 1: # vrea[j]= sec[index]; # else: vrea[j] = 0.0 # added this - no effect # if lktype == 2: vra[j] = vra[j] - vrea[j] # just using local variable mean not full residual approach else: - + # It is a previously simulated node (keep index for table look-up): # print(j) index = j-(nclose) # adjust for 0 index @@ -1246,15 +1250,15 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n y1 = cnodey[index] vra[j] = cnodev[index] ind = icnode[index] -# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) +# print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) ix1 = ix + (int(ixnode[ind])-nctx-1) iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) index = ix1 + (iy1-1)*nx -# if lktype == 2: +# if lktype == 2: # vrea[j]= lvm[index] # vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix + for i in range(0,na): # we need the full matrix # print('kriging indice populated' + str(j) + ',' + str(i)) # Sort out the actual location of point "i" if i < nclose: @@ -1303,37 +1307,37 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - r[j] = cov +# print('ix1,iy1'); print(x1,y1) + r[j] = cov else: # Try to use the covariance look-up (if the distance is in range): # ii = nctx + 1 + (ix - ix1) # jj = ncty + 1 + (iy - iy1) - + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) # print('ix,iy ='); print(ix,iy) # print('ix1,iy1'); print(ix1,iy1) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 # print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) +# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: # cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) -# print('ix1,iy1'); print(x1,y1) - +# print('ix1,iy1'); print(x1,y1) + r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] if lktype == 1: # we need the full array iin = iin + 1 - a[iin] = 1.0 + a[iin] = 1.0 if lktype == 4: # we need the full array iin = iin + 1 - a[iin] = colocorr*r[j] + a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: for i in range(0,na): @@ -1377,12 +1381,12 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n ii = na r[ii] = colocorr rr[ii] = r[ii] -# if (sfmax-sfmin) < EPSLON: +# if (sfmax-sfmin) < EPSLON: # neq = neq - 1 # colc = False # Solve the Kriging System: -# print('Kriging equations neq = ' + str(neq)); +# print('Kriging equations neq = ' + str(neq)); # print('a'); print(a) # print('r'); print(r) # print('data'); print(vra) @@ -1392,11 +1396,11 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n else: # print('neq prior ksol' + str(neq)) s = ksol_numpy(neq,a,r) -# print('neq post ksol' + str(neq)) +# print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) -# print('r'); print(r) +# print('r'); print(r) ising = 0 # need to figure this out # print('s'); print(s) @@ -1411,13 +1415,13 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('kriging weights'); print(s) cmean = 0.0 # print('cbb = ' + str(cbb)) - cstdev = cbb + cstdev = cbb sumwts = 0.0 for i in range(0,na): cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] - if lktype == 1: + if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) # if lktype == 2: cmean = cmean + gmean @@ -1433,7 +1437,7 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n if lktype == 0 or lktype == 2: cmean = cmean + (1.0-sumwts)*gmean # print('cmean'); print(cmean) - + # Error message if negative variance: if cstdev < 0.0: # print('ERROR: Negative Variance: ' + str(cstdev)) @@ -1860,7 +1864,7 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): # Main loop over all pairs for i in range(0, nd): for j in range(0, nd): - + # Definition of the lag corresponding to the current pair dx = x[j] - x[i] dy = y[j] - y[i] @@ -1987,16 +1991,16 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values - vr = df_extract[vcol].values - + vr = df_extract[vcol].values + # Summary statistics for the data after trimming avg = vr.mean() stdev = vr.std() sills = stdev**2.0 ssq = sills vrmin = vr.min() - vrmax = vr.max() - + vrmax = vr.max() + # Initialize the summation arrays npp = np.zeros((nylag*2+1,nxlag*2+1)) gam = np.zeros((nylag*2+1,nxlag*2+1)) @@ -2006,11 +2010,11 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): tm = np.zeros((nylag*2+1,nxlag*2+1)) hv = np.zeros((nylag*2+1,nxlag*2+1)) tv = np.zeros((nylag*2+1,nxlag*2+1)) - - # First fix the location of a seed point: - for i in range(0,nd): - # Second loop over the data: - for j in range(0,nd): + + # First fix the location of a seed point: + for i in range(0,nd): + # Second loop over the data: + for j in range(0,nd): # The lag: ydis = y[j] - y[i] iyl = nylag + int(ydis/dylag) @@ -2019,7 +2023,7 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): xdis = x[j] - x[i] ixl = nxlag + int(xdis/dxlag) if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing - continue + continue # We have an acceptable pair, therefore accumulate all the statistics # that are required for the variogram: npp[iyl,ixl] = npp[iyl,ixl] + 1 # our ndarrays read from the base to top, so we flip @@ -2028,11 +2032,11 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): tv[iyl,ixl] = tm[iyl,ixl] + vr[i]*vr[i] hv[iyl,ixl] = hm[iyl,ixl] + vr[j]*vr[j] gam[iyl,ixl] = gam[iyl,ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) - + # Get average values for gam, hm, tm, hv, and tv, then compute # the correct "variogram" measure: - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): + for iy in range(0,nylag*2+1): + for ix in range(0,nxlag*2+1): if npp[iy,ix] <= minnp: gam[iy,ix] = -999. hm[iy,ix] = -999. @@ -2045,14 +2049,14 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): hm[iy,ix] = hm[iy,ix] / rnum tm[iy,ix] = tm[iy,ix] / rnum hv[iy,ix] = hv[iy,ix] / rnum - hm[iy,ix]*hm[iy,ix] - tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] + tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] # Attempt to standardize: if isill > 0: gamf[iy,ix] = gamf[iy,ix]/sills - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): + for iy in range(0,nylag*2+1): + for ix in range(0,nxlag*2+1): gamf[iy,ix] = gam[nylag*2-iy,ix] - nppf[iy,ix] = npp[nylag*2-iy,ix] + nppf[iy,ix] = npp[nylag*2-iy,ix] return gamf, nppf def vmodel( @@ -2064,27 +2068,27 @@ def vmodel( """GSLIB's VMODEL program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (Mar, 2019). - :param nlag: number of variogram lags + :param nlag: number of variogram lags :param xlag: size of the lags - :param axm: direction by 2D azimuth, 000 is y positive, 090 is x positive + :param axm: direction by 2D azimuth, 000 is y positive, 090 is x positive :param vario: dictionary with the variogram parameters :return: """ - + # Parameters MAXNST=4 - DEG2RAD=3.14159265/180.0 + DEG2RAD=3.14159265/180.0 MAXROT=MAXNST+1 EPSLON = 1.0e-20 VERSION= 1.01 - + # Declare arrays index = np.zeros(nlag+1) h = np.zeros(nlag+1) gam = np.zeros(nlag+1) cov = np.zeros(nlag+1) ro = np.zeros(nlag+1) - + # Load the variogram nst = vario["nst"] cc = np.zeros(nst) @@ -2092,7 +2096,7 @@ def vmodel( it = np.zeros(nst) ang = np.zeros(nst) anis = np.zeros(nst) - + c0 = vario["nug"] cc[0] = vario["cc1"] it[0] = vario["it1"] @@ -2105,14 +2109,14 @@ def vmodel( ang[1] = vario["azi2"] aa[1] = vario["hmaj2"] anis[1] = vario["hmin2"] / vario["hmaj2"] - + xoff = math.sin(DEG2RAD*azm)*xlag yoff = math.cos(DEG2RAD*azm)*xlag print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff)) - rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) - - - xx = 0.0; yy = 0.0 + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) + + + xx = 0.0; yy = 0.0 for il in range(0,nlag+1): index[il] = il cov[il] = cova2(0.0,0.0,xx,yy,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) @@ -2197,6 +2201,10 @@ def nscore( return ns, vr, wt_ns +_nums = [] +_dists = [] +_weights = [] + def kb2d( df, xcol, @@ -2244,31 +2252,31 @@ def kb2d( :param vario: :return: """ - + # Constants UNEST = -999. EPSLON = 1.0e-10 VERSION = 2.907 first = True - PMX = 9999.0 + PMX = 9999.0 MAXSAM = ndmax + 1 MAXDIS = nxdis * nydis MAXKD = MAXSAM + 1 MAXKRG = MAXKD * MAXKD - + # load the variogram nst = vario['nst'] cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) ang = np.zeros(nst); anis = np.zeros(nst) - - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; + + c0 = vario['nug']; + cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; + cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; - -# Allocate the needed memory: + +# Allocate the needed memory: xdb = np.zeros(MAXDIS) ydb = np.zeros(MAXDIS) xa = np.zeros(MAXSAM) @@ -2290,8 +2298,8 @@ def kb2d( x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values - -# Make a KDTree for fast search of nearest neighbours + +# Make a KDTree for fast search of nearest neighbours dp = list((y[i], x[i]) for i in range(0,nd)) data_locs = np.column_stack((y,x)) tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) @@ -2303,11 +2311,18 @@ def kb2d( vrmin = vr.min() vrmax = vr.max() + global _nums + global _dists + global _weights + _nums = [] + _dists = [] + _weights = [] + # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): ndb = nxdis * nydis - if ndb > MAXDIS: + if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') return kmap @@ -2315,10 +2330,10 @@ def kb2d( ydis = ysiz / max(float(nydis),1.0) xloc = -0.5*(xsiz+xdis) i = -1 # accounting for 0 as lowest index - for ix in range(0,nxdis): + for ix in range(0,nxdis): xloc = xloc + xdis yloc = -0.5*(ysiz+ydis) - for iy in range(0,nydis): + for iy in range(0,nydis): yloc = yloc + ydis i = i+1 xdb[i] = xloc @@ -2337,10 +2352,10 @@ def kb2d( if ndb <= 1: cbb = cov else: - for i in range(0,ndb): - for j in range(0,ndb): + for i in range(0,ndb): + for j in range(0,ndb): cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) - if i == j: + if i == j: cov = cov - c0 cbb = cbb + cov cbb = cbb/real(ndb*ndb) @@ -2350,22 +2365,22 @@ def kb2d( ak = 0.0 vk = 0.0 for iy in range(0,ny): - yloc = ymn + (iy-0)*ysiz + yloc = ymn + (iy-0)*ysiz for ix in range(0,nx): xloc = xmn + (ix-0)*xsiz current_node = (yloc,xloc) - + # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search # remove any data outside search radius na = len(dist) nums = nums[dist UNEST: @@ -2489,6 +2505,11 @@ def kb2d( ak = ak + est vk = vk + est*est + if xloc == 21 and yloc == 102: + _nums.append(nums.copy()) + _dists.append(dist.copy()) + _weights.append(s) + # END OF MAIN LOOP OVER ALL THE BLOCKS: if nk >= 1: @@ -2499,734 +2520,377 @@ def kb2d( return kmap, vmap -def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xmn,xsiz,ny,ymn,ysiz,ndmin,ndmax,radius,ktype,vario): - - """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the - original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (March, 2019). - :param df: pandas DataFrame with the spatial data - :param xcol: name of the x coordinate column - :param ycol: name of the y coordinate column - :param vcol: name of the property column (cateogorical or continuous - note continuous is untested) - :param ivtype: variable type, 0 - categorical, 1 - continuous - :param koption: kriging option, 0 - estimation, 1 - cross validation (under construction) - :param ncut: number of categories or continuous thresholds - :param thresh: an ndarray with the category labels or continuous thresholds - :param gcdf: global CDF, not used if trend is present - :param trend: an ndarray [ny,ny,ncut] with the local trend proportions or cumulative CDF values - :param tmin: property trimming limit - :param tmax: property trimming limit - :param nx: definition of the grid system (x axis) - :param xmn: definition of the grid system (x axis) - :param xsiz: definition of the grid system (x axis) - :param ny: definition of the grid system (y axis) - :param ymn: definition of the grid system (y axis) - :param ysiz: definition of the grid system (y axis) - :param nxdis: number of discretization points for a block - :param nydis: number of discretization points for a block - :param ndmin: minimum number of data points to use for kriging a block - :param ndmax: maximum number of data points to use for kriging a block - :param radius: maximum isotropic search radius - :param ktype: kriging type, 0 - simple kriging and 1 - ordinary kriging - :param vario: list with all of the indicator variograms (sill of 1.0) in consistent order with above parameters - :return: - """ - -# Find the needed paramters: - PMX = 9999.9 +_nums = None +_dists = None +_weights = None + + +@jit(nopython=True) +def kb2d_jit_main( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float): + +# Constants + UNEST = -999. + EPSLON = 1.0e-10 + VERSION = 2.907 + first = True + PMX = 9999.0 MAXSAM = ndmax + 1 - MAXEQ = MAXSAM + 1 - mik = 0 # full indicator kriging - use_trend = False - if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: use_trend = True - -# load the variogram - MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) - aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) - ang = np.zeros((MAXNST,ncut)); anis = np.zeros((MAXNST,ncut)) + MAXDIS = nxdis * nydis + MAXKD = MAXSAM + 1 + MAXKRG = MAXKD * MAXKD - for icut in range(0,ncut): - nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; - ang[0,icut] = vario[icut]['azi1']; - aa[0,icut] = vario[icut]['hmaj1']; anis[0,icut] = vario[icut]['hmin1']/vario[icut]['hmaj1']; - if nst[icut] == 2: - cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; - aa[1,icut] = vario[icut]['hmaj2']; anis[1,icut] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + ndmax = min(ndmax,nd) -# Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax - MAXDAT = len(df_extract) - MAXCUT = ncut - MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) - maxcov = np.zeros(ncut) - - # Allocate the needed memory: +# load the variogram + nst = vario_int['nst'] + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) + +# Allocate the needed memory: + xdb = np.zeros(MAXDIS) + ydb = np.zeros(MAXDIS) xa = np.zeros(MAXSAM) ya = np.zeros(MAXSAM) vra = np.zeros(MAXSAM) - dist = np.zeros(MAXSAM) - nums = np.zeros(MAXSAM) - r = np.zeros(MAXEQ) - rr = np.zeros(MAXEQ) - s = np.zeros(MAXEQ) - a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) - - nviol = np.zeros(MAXCUT) - aviol = np.zeros(MAXCUT) - xviol = np.zeros(MAXCUT) - - ccdf = np.zeros(ncut) - ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) - - x = df_extract[xcol].values - y = df_extract[ycol].values - v = df_extract[vcol].values - -# The indicator data are constructed knowing the thresholds and the -# data value. - - if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') - else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v - -# Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - -# Summary statistics of the input data - - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() - ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() - print('Data for IK3D: Variable column ' + str(vcol)) - print(' Number = ' + str(MAXDAT)) - ndh = MAXDAT - - actloc = np.zeros(MAXDAT, dtype = int) - for i in range(1,MAXDAT): - actloc[i] = i - -# Set up the rotation/anisotropy matrices that are needed for the -# variogram and search: - - print('Setting up rotation matrices for variogram and search') - radsqd = radius * radius - rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[:,ic],cc[:,ic],ang[:,ic],9999.9) - rotmat.append(rotmat_temp) -# Initialize accumulators: # not setup yet - nk = 0 - xk = 0.0 - vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 - xviol[icut] = -1.0 - nxy = nx*ny - print('Working on the kriging') +# dist = np.zeros(MAXSAM) +# nums = np.zeros(MAXSAM) + r = np.zeros(MAXKD) + rr = np.zeros(MAXKD) + s = np.zeros(MAXKD) + a = np.zeros(MAXKRG) + kmap = np.zeros((nx,ny)) + vmap = np.zeros((nx,ny)) -# Report on progress from time to time: - if koption == 0: - nxy = nx*ny - nloop = nxy - irepo = max(1,min((nxy/10),10000)) - else: - nloop = 10000000 - irepo = max(1,min((nd/10),10000)) - ddh = 0.0 - -# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: - for index in range(0,nloop): - - if (int(index/irepo)*irepo) == index: print(' currently on estimate ' + str(index)) - - if koption == 0: - iy = int((index)/nx) - ix = index - (iy)*nx - xloc = xmn + (ix)*xsiz - yloc = ymn + (iy)*ysiz - else: - ddh = 0.0 - # TODO: pass the cross validation value -# Find the nearest samples within each octant: First initialize the counter arrays: - na = -1 # accounting for 0 as first index - dist.fill(1.0e+20) - nums.fill(-1) - current_node = (yloc,xloc) - dist, close = tree.query(current_node,ndmax) # use kd tree for fast nearest data search - # remove any data outside search radius - close = close[dist= 1: krig = False +# Summary statistics for the data after trimming + avg = np.mean(vr) + stdev = np.std(vr) + ss = stdev ** 2.0 + vrmin = np.min(vr) + vrmax = np.max(vr) -# Identify the close data (there may be a different number of data at -# each threshold because of constraint intervals); however, if -# there are no constraint intervals then this step can be avoided. - nca = -1 - for ia in range(0,nclose): - j = int(close[ia]+0.5) - ii = actloc[j] - accept = True - if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: accept = False - if accept: - nca = nca + 1 - vra[nca] = vr[ii,ic] - xa[nca] = x[j] - ya[nca] = y[j] +# Set up the discretization points per block. Figure out how many +# are needed, the spacing, and fill the xdb and ydb arrays with the +# offsets relative to the block center (this only gets done once): + ndb = nxdis * nydis + if ndb > MAXDIS: + print('ERROR KB2D: Too many discretization points ') + print(' Increase MAXDIS or lower n[xy]dis') +# return kmap + xdis = xsiz / max(float(nxdis), 1.0) + ydis = ysiz / max(float(nydis), 1.0) + xloc = -0.5 * (xsiz + xdis) + i = -1 # accounting for 0 as lowest index + for ix in range(0, nxdis): + xloc = xloc + xdis + yloc = -0.5 * (ysiz+ydis) + for iy in range(0, nydis): + yloc = yloc + ydis + i += 1 + xdb[i] = xloc + ydb[i] = yloc -# If there are no samples at this threshold then use the global cdf: - if nca == -1: - if use_trend: - ccdf[ic] = trend[ny-iy-1,ix,ic] +# Initialize accumulators: + cbb = 0.0 + rad2 = radius * radius + +# Calculate Block Covariance. Check for point kriging. + rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) + cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) +# Keep this value to use for the unbiasedness constraint: + unbias = cov + first = False + if ndb <= 1: + cbb = cov + else: + for i in range(0, ndb): + for j in range(0, ndb): + cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + if i == j: + cov = cov - c0 + cbb = cbb + cov + # print(real) + # cbb = cbb/real(ndb*ndb) # TODO: undefined? + +# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: + nk = 0 + ak = 0.0 + vk = 0.0 + + for iy in range(0,ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0,nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc, xloc) + +# Find the nearest samples within each octant: First initialize +# the counter arrays: + na = -1 # accounting for 0 as first index +# dist.fill(1.0e+20) +# nums.fill(-1) + # dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search + dist, nums = tree[current_node] + # remove any data outside search radius + na = len(dist) + nums = nums[dist UNEST: + nk = nk + 1 + ak = ak + est + vk = vk + est*est -# Set the constants - MAXCTX = mxctx - MAXCTY = mxcty - MAXCXY = MAXCTX * MAXCTY - MAXX = nx - MAXY = ny - MAXZ = 1 # assuming 2D for now - MXY = MAXX * MAXY - if MXY < 100: MXY = 100 - MAXNOD = nodmax - MAXSAM = ndmax - MAXKR1 = MAXNOD + MAXSAM + 1 -# print('MAXKR1'); print(MAXKR1) - MAXKR2 = MAXKR1 * MAXKR1 - MAXSBX = 1 - if nx > 1: - MAXSBX = int(nx/2) - if MAXSBX > 50: MAXSBX=50 - MAXSBY = 1 - if ny > 1: - MAXSBY = int(ny/2) - if MAXSBY > 50: MAXSBY=50 + if nk >= 1: + ak = ak / float(nk) + vk = vk/float(nk) - ak*ak +# print(' Estimated ' + str(nk) + ' blocks ') +# print(' average ' + str(ak) + ' variance ' + str(vk)) - MAXSBZ = 1 - MAXSB = MAXSBX*MAXSBY*MAXSBZ - -# Declare arrays + return kmap, vmap - dist = np.zeros(ndmax) - nums = np.zeros(ndmax,dtype = int) - -# Perform some quick checks - if nx > MAXX or ny> MAXY: - print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') - print(' you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.') - return sim - if ltail != 1 and ltail != 2: - print('ERROR invalid lower tail option ' + str(ltail)) - print(' only allow 1 or 2 - see GSLIB manual ') - return sim - if utail != 1 and utail != 2 and utail != 4: - print('ERROR invalid upper tail option ' + str(ltail)) - print(' only allow 1,2 or 4 - see GSLIB manual ') - return sim - if utail == 4 and utpar < 1.0: - print('ERROR invalid power for hyperbolic tail' + str(utpar)) - print(' must be greater than 1.0!') - return sim - if ltail == 2 and ltpar < 0.0: - print('ERROR invalid power for power model' + str(ltpar)) - print(' must be greater than 0.0!') - return sim - if utail == 2 and utpar < 0.0: - print('ERROR invalid power for power model' + str(utpar)) - print(' must be greater than 0.0!') - return sim - -# Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax - nd = len(df_extract) - ndmax = min(ndmax,nd) - x = df_extract[xcol].values - y = df_extract[ycol].values - vr = df_extract[vcol].values - vr_orig = np.copy(vr) -# print('size of data extract'); print(len(vr)) - wt = []; wt = np.array(wt) - if wcol > -1: - wt = df_extract[wcol].values - else: - wt = np.ones(nd) - sec = []; sec = np.array(sec) - if scol > -1: - sec = df_extract[scol].values - if itrans == 1: - if ismooth == 1: - dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] - ntr = len(dftrans_extract) - vrtr = dftrans_extrac[tcol].values - if twtcol > -1: - vrgtr = dftrans_extrac[tcol].values - else: - vrgtr = np.ones(ntr) - else: - vrtr = df_extract[vcol].values - ntr = len(df_extract) - vrgtr = np.copy(wt) - twt = np.sum(vrgtr) -# sort - vrtr,vrgtr = dsortem(0,ntr,vrtr,2,b=vrgtr) - -# Compute the cumulative probabilities and write transformation table - twt = max(twt,EPSLON) - oldcp = 0.0 - cp = 0.0 -# print('ntr'); print(ntr) - for j in range(0,ntr): - cp = cp + vrgtr[j]/twt - w = (cp + oldcp)*0.5 - vrg = gauinv(w) - oldcp = cp -# Now, reset the weight to the normal scores value: - vrgtr[j] = vrg - - twt = np.sum(wt) -# Normal scores transform the data - for id in range(0,nd): - if itrans == 1: - vrr = vr[id] - j = dlocate(vrtr,1,nd,vrr) - j = min(max(0,j),(nd-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[nd-1]): vrg = vrgtr[nd-1] - vr[id] = vrg - - weighted_stats_orig = DescrStatsW(vr_orig,weights=wt) - orig_av = weighted_stats_orig.mean - orig_ss = weighted_stats_orig.var - - weighted_stats = DescrStatsW(vr,weights=wt) - av = weighted_stats.mean - ss = weighted_stats.var - - print('\n Data for SGSIM: Number of acceptable data = ' + str(nd)) - print(' Number trimmed = ' + str(len(df)- nd)) - print(' Weighted Average = ' + str(round(orig_av,4))) - print(' Weighted Variance = ' + str(round(orig_ss,4))) - print(' Weighted Transformed Average = ' + str(round(av,4))) - print(' Weighted Transformed Variance = ' + str(round(ss,4))) - -# Read in secondary data - sim = np.random.rand(nx*ny) - index = 0 - for ixy in range(0,nxy): - sim[index] = index - - lvm = []; lvm = np.array(lvm) - if ktype >= 2: - #lvm = np.copy(sec_map.flatten()) - ind = 0 - lvm = np.zeros(nxy) - for iy in range(0,ny): - for ix in range(0,nx): - lvm[ind] = sec_map[ny-iy-1,ix] - ind = ind + 1 - if ktype == 2 and itrans == 1: - for ixy in range(0,nxy): -# Do we to transform the secondary variable for a local mean? - vrr = lvm[ixy] - j = dlocate(vrtr,1,ntr,vrr) - j = min(max(0,j),(ntr-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[ntr-1]): vrg = vrgtr[nd-1] - lvm[ixy] = vrg - av = np.average(lvm) - ss = np.var(lvm) - print(' Secondary Data: Number of data = ' + str(nx*ny)) - print(' Equal Weighted Average = ' + str(round(av,4))) - print(' Equal Weighted Variance = ' + str(round(ss,4))) -# Do we need to work with data residuals? (Locally Varying Mean) - if ktype == 2: - sec = np.zeros(nd) - for idd in range(0,nd): - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) - index = ix + (iy-1)*nx - sec[idd] = lvm[index] -# Calculation of residual moved to krige subroutine: vr(i)=vr(i)-sec(i) - - -# Do we need to get an external drift attribute for the data? - if ktype == 3: - for idd in range(0,nd): - if sec[i] != UNEST: - ix = getindx(nx,xmn,xsiz,x[idd]) - iy = getindx(ny,ymn,ysiz,y[idd]) - ind = ix + (iy)*nx - sec[ind] = lvm[ind] - -# Transform the secondary attribute to normal scores? - if ktype == 4: - order_sec = np.zeros(nxy) - ind = 0 - for ixy in range(0,nxy): - order_sec[ixy] = ind - ind = ind + 1 - print(' Transforming Secondary Data with') - print(' variance reduction of ' + str(varred)) - lvm,order_sec = dsortem(0,nxy,lvm,2,b=order_sec) - oldcp = 0.0 - cp = 0.0 - for i in range(0,nxy): - cp = cp + (1.0/(nxy)) - w = (cp + oldcp)/2.0 - lvm[i] = gauinv(w) - lvm[i] = lvm[i] * varred - oldcp = cp - order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) -# return np.reshape(lvm,(ny,nx)) # check the transform - -# Set up the rotation/anisotropy matrices that are needed for the -# variogram and search. - print('Setting up rotation matrices for variogram and search') - if nst == 1: - rotmat = setrot(ang[0],ang[0],sang1,anis[0],anis[0],sanis1,nst,MAXROT=2) - else: - rotmat = setrot(ang[0],ang[1],sang1,anis[0],anis[1],sanis1,nst,MAXROT=2) - isrot = 2 # search rotation is appended as 3rd +def kb2d_jit( + df, + xcol, + ycol, + vcol, + tmin, + tmax, + nx, + xmn, + xsiz, + ny, + ymn, + ysiz, + nxdis, + nydis, + ndmin, + ndmax, + radius, + ktype, + skmean, + vario, +): + """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the + original Fortran to Python by Michael Pyrcz, the University of Texas at + Austin (Jan, 2019). + :param df: pandas DataFrame with the spatial data + :param xcol: name of the x coordinate column + :param ycol: name of the y coordinate column + :param vcol: name of the property column + :param tmin: property trimming limit + :param tmax: property trimming limit + :param nx: definition of the grid system (x axis) + :param xmn: definition of the grid system (x axis) + :param xsiz: definition of the grid system (x axis) + :param ny: definition of the grid system (y axis) + :param ymn: definition of the grid system (y axis) + :param ysiz: definition of the grid system (y axis) + :param nxdis: number of discretization points for a block + :param nydis: number of discretization points for a block + :param ndmin: minimum number of data points to use for kriging a block + :param ndmax: maximum number of data points to use for kriging a block + :param radius: maximum isotropic search radius + :param ktype: + :param skmean: + :param vario: + :return: + """ +# Load the data + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + nd = len(df_extract) + x = df_extract[xcol].values + y = df_extract[ycol].values + vr = df_extract[vcol].values - rotmat_2d, maxcov = setup_rotmat2(c0,nst,it,cc,ang) # will use one in the future -# print('MaxCov = ' + str(maxcov)) - -# Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) +# Make a KDTree for fast search of nearest neighbours + # dp = list((y[i], x[i]) for i in range(0,nd)) data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - -# Set up the covariance table and the spiral search: - cov_table,tmp,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,rotmat,radsqd) - -# print('Covariance Table'); print(cov_table) -# MAIN LOOP OVER ALL THE SIMULAUTIONS: - for isim in range(0,nsim): - -# Work out a random path for this realization: - sim = np.random.rand(nx*ny) - order = np.zeros(nxy) - ind = 0 - for ixy in range(0,nxy): - order[ixy] = ind - ind = ind + 1 - -# The multiple grid search works with multiples of 4 (yes, that is -# somewhat arbitrary): - - if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) -# print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 - index = jx + (jy-1)*nx - sim[index] = sim[index] - (imult+1) - - -# Initialize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) - sim.fill(UNEST) - print('Working on realization number ' + str(isim)) - -# Assign the data to the closest grid node: - - TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) - ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz -# print('xx, yy' + str(xx) + ',' + str(yy)) - test = abs(xx-x[idd]) + abs(yy-y[idd]) - -# Assign this data to the node (unless there is a closer data): - if sstrat == 1: - if sim[ind] > 0.0: - id2 = int(sim[ind]+0.5) - test2 = abs(xx-x(id2)) + abs(yy-y(id2)) - if test <= test2: - sim[ind] = idd - else: - sim[ind] = id2 - -# Assign a flag so that this node does not get simulated: - if sstrat == 0 and test <= TINY: sim[ind]=10.0*UNEST - -# Now, enter data values into the simulated grid: - for ind in range(0,nxy): - idd = int(sim[ind]+0.5) - if idd > 0: sim[ind] = vr[id] - irepo = max(1,min((nxy/10),10000)) - -# MAIN LOOP OVER ALL THE NODES: - for ind in range(0,nxy): - if (int(ind/irepo)*irepo) == ind: - print(' currently on node ' + str(ind)) - -# Figure out the location of this point and make sure it has -# not been assigned a value already: - - index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz - current_node = (yy,xx) -# print('Current_node'); print(current_node) - -# Now, we'll simulate the point ix,iy,iz. First, get the close data -# and make sure that there are enough to actually simulate a value, -# we'll only keep the closest "ndmax" data, and look for previously -# simulated grid nodes: - - if sstrat == 0: -# print('searching for nearest data') - na = -1 # accounting for 0 as first index - if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search - else: - dist, nums = tree.query(current_node,ndmax) - # remove any data outside search radius - -# print('nums'); print(nums) -# print('dist'); print(dist) - na = len(dist) - nums = nums[dist -9.0 and simval < 9.0: - ne = ne + 1 - av = av + simval - ss = ss + simval*simval - if itrans == 1 and simval > (UNEST+EPSLON): - simval = backtr_value(simval,vrtr,vrgtr,zmin,zmax,ltail,ltpar,utail,utpar) - if simval < zmin: simval = zmin - if simval > zmax: simval = zmax - sim[ind] = simval -# print('simulated value = ' + str(sim[ind]) + ' at location index = ' + str(ind)) - av = av / max(ne,1.0) - ss =(ss / max(ne,1.0)) - av * av - print('\n Realization ' + str(isim) + ': number = ' + str(ne)) - print(' mean = ' + str(round(av,4)) + ' (close to 0.0?)') - print(' variance = ' + str(round(ss,4)) + ' (close to gammabar(V,V)? approx. 1.0)') + vario_float['nug'] = vario.pop('nug') + vario_float['cc1'] = vario.pop('cc1') + vario_float['cc2'] = vario.pop('cc2') -# END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] - return sim_out + for key in vario.keys(): + vario_int[key] = vario[key] -def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin, - ndmax,nodmax,mults,nmult,noct,radius,ktype,vario): - - """A 2D version of GSLIB's SISIM Indicator Simulation program (Deutsch and Journel, 1998) converted from the + tree = Dict.empty( + key_type = nb.types.Tuple((nb.f8, nb.f8)), + value_type = nb.types.Tuple((nb.f8[:], nb.i4[:])) + ) + + for iy in range(0,ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0,nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc,xloc) + tree[current_node] = _tree.query(current_node, ndmax, distance_upper_bound=radius) + + kmap, vmap = kb2d_jit_main( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float) + + return kmap, vmap + + +def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xmn,xsiz,ny,ymn,ysiz,ndmin,ndmax,radius,ktype,vario): + + """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (March, 2019). WARNING: only tested for cateogrical ktype 0, 1 and 2 (locally variable proportion). + Austin (March, 2019). :param df: pandas DataFrame with the spatial data :param xcol: name of the x coordinate column :param ycol: name of the y coordinate column @@ -3254,1472 +2918,1444 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin :param vario: list with all of the indicator variograms (sill of 1.0) in consistent order with above parameters :return: """ -# Checks - if utail == 3 or ltail == 3 or middle == 3: - print('ERROR - distribution extrapolation option 3 with table is not available') - return sim_out - if xcol == "" or ycol == "": - print('ERROR - must have x and y column in the DataFrame') - return sim_out -# Set parameters from the include - UNEST = -99.0 - EPSLON = 1.0e-20 - VERSION = 0.001 - np.random.seed(seed) - colocorr = 0.0 # no collocated cokriging - lvm = 0 # no kriging with a locally variable mean - sec = []; sec = np.array(sec) # no secondary data - ng = 0 # no tabulated values -# Find the needed paramters: - PMX = 9999.9 - MAXSAM = ndmax + 1 - MAXEQ = MAXSAM + 1 - nxy = nx*ny - mik = 0 # full indicator kriging - use_trend = False - trend1d = np.zeros((nxy,1)) # no trend make a dummy trend - if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: - trend1d = np.zeros((nxy,ncut)) - use_trend = True - index = 0 - for iy in range(0,ny): - for ix in range(0,nx): - for ic in range(0,ncut): - trend1d[index,ic] = trend[ny-iy-1,ix,ic] # copy trend - index = index + 1 - - MAXORD = nxy - MAXNOD = nodmax - - cnodeiv = np.zeros((ncut+1,MAXNOD)) - - tmp = np.zeros(MAXORD) +# Find the needed paramters: + PMX = 9999.9 + MAXSAM = ndmax + 1 + MAXEQ = MAXSAM + 1 + mik = 0 # full indicator kriging + use_trend = False + if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: use_trend = True + +# load the variogram + MAXNST = 2 + nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) + aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) + ang = np.zeros((MAXNST,ncut)); anis = np.zeros((MAXNST,ncut)) + + for icut in range(0,ncut): + nst[icut] = int(vario[icut]['nst']) + c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; + ang[0,icut] = vario[icut]['azi1']; + aa[0,icut] = vario[icut]['hmaj1']; anis[0,icut] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + if nst[icut] == 2: + cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; + aa[1,icut] = vario[icut]['hmaj2']; anis[1,icut] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + +# Load the data + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + MAXDAT = len(df_extract) + MAXCUT = ncut + MAXNST = 2 + MAXROT = MAXNST*MAXCUT+ 1 + ikout = np.zeros((nx,ny,ncut)) + maxcov = np.zeros(ncut) + + # Allocate the needed memory: + xa = np.zeros(MAXSAM) + ya = np.zeros(MAXSAM) + vra = np.zeros(MAXSAM) + dist = np.zeros(MAXSAM) + nums = np.zeros(MAXSAM) + r = np.zeros(MAXEQ) + rr = np.zeros(MAXEQ) + s = np.zeros(MAXEQ) + a = np.zeros(MAXEQ*MAXEQ) + ikmap = np.zeros((nx,ny,ncut)) + vr = np.zeros((MAXDAT,MAXCUT+1)) + + nviol = np.zeros(MAXCUT) + aviol = np.zeros(MAXCUT) + xviol = np.zeros(MAXCUT) + + ccdf = np.zeros(ncut) + ccdfo = np.zeros(ncut) + ikout = np.zeros((nx,ny,ncut)) + + x = df_extract[xcol].values + y = df_extract[ycol].values + v = df_extract[vcol].values + +# The indicator data are constructed knowing the thresholds and the +# data value. + + if ivtype == 0: + for icut in range(0,ncut): + vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + else: + for icut in range(0,ncut): + vr[:,icut] = np.where(v <= thresh[icut], '1', '0') + vr[:,ncut] = v + +# Make a KDTree for fast search of nearest neighbours + dp = list((y[i], x[i]) for i in range(0,MAXDAT)) + data_locs = np.column_stack((y,x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + +# Summary statistics of the input data + + avg = vr[:,ncut].mean() + stdev = vr[:,ncut].std() + ss = stdev**2.0 + vrmin = vr[:,ncut].min() + vrmax = vr[:,ncut].max() + print('Data for IK3D: Variable column ' + str(vcol)) + print(' Number = ' + str(MAXDAT)) + ndh = MAXDAT + + actloc = np.zeros(MAXDAT, dtype = int) + for i in range(1,MAXDAT): + actloc[i] = i + +# Set up the rotation/anisotropy matrices that are needed for the +# variogram and search: + + print('Setting up rotation matrices for variogram and search') + radsqd = radius * radius + rotmat = [] + for ic in range(0,ncut): + rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[:,ic],cc[:,ic],ang[:,ic],9999.9) + rotmat.append(rotmat_temp) +# Initialize accumulators: # not setup yet + nk = 0 + xk = 0.0 + vk = 0.0 + for icut in range (0,ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 + xviol[icut] = -1.0 + nxy = nx*ny + print('Working on the kriging') + +# Report on progress from time to time: + if koption == 0: + nxy = nx*ny + nloop = nxy + irepo = max(1,min((nxy/10),10000)) + else: + nloop = 10000000 + irepo = max(1,min((nd/10),10000)) + ddh = 0.0 + +# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: + for index in range(0,nloop): + + if (int(index/irepo)*irepo) == index: print(' currently on estimate ' + str(index)) + + if koption == 0: + iy = int((index)/nx) + ix = index - (iy)*nx + xloc = xmn + (ix)*xsiz + yloc = ymn + (iy)*ysiz + else: + ddh = 0.0 + # TODO: pass the cross validation value + +# Find the nearest samples within each octant: First initialize the counter arrays: + na = -1 # accounting for 0 as first index + dist.fill(1.0e+20) + nums.fill(-1) + current_node = (yloc,xloc) + dist, close = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + # remove any data outside search radius + close = close[dist= 1: krig = False + +# Identify the close data (there may be a different number of data at +# each threshold because of constraint intervals); however, if +# there are no constraint intervals then this step can be avoided. + nca = -1 + for ia in range(0,nclose): + j = int(close[ia]+0.5) + ii = actloc[j] + accept = True + if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: accept = False + if accept: + nca = nca + 1 + vra[nca] = vr[ii,ic] + xa[nca] = x[j] + ya[nca] = y[j] + +# If there are no samples at this threshold then use the global cdf: + if nca == -1: + if use_trend: + ccdf[ic] = trend[ny-iy-1,ix,ic] + else: + ccdf[ic] = gcdf[ic] + else: + +# Now, only load the variogram, build the matrix,... if kriging: + neq = nclose + ktype + na = nclose + +# Set up kriging matrices: + iin=-1 # accounting for first index of 0 + for j in range(0,na): +# Establish Left Hand Side Covariance Matrix: + for i in range(0,na): # was j - want full matrix + iin = iin + 1 + a[iin] = cova2(xa[i],ya[i],xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + if ktype == 1: + iin = iin + 1 + a[iin] = maxcov[ic] + r[j] = cova2(xloc,yloc,xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + +# Set the unbiasedness constraint: + if ktype == 1: + for i in range(0,na): + iin = iin + 1 + a[iin] = maxcov[ic] + iin = iin + 1 + a[iin] = 0.0 + r[neq-1] = maxcov[ic] + rr[neq-1] = r[neq] +# Solve the system: + if neq == 1: + ising = 0.0 + s[0] = r[0] / a[0] + else: + s = ksol_numpy(neq,a,r) + +# Finished kriging (if it was necessary): + +# Compute Kriged estimate of cumulative probability: + sumwts = 0.0 + ccdf[ic] = 0.0 + for i in range(0,nclose): + ccdf[ic] = ccdf[ic] + vra[i]*s[i] + sumwts = sumwts + s[i] + if ktype == 0: + if use_trend == True: + ccdf[ic] = ccdf[ic] + (1.0-sumwts)*trend[ny-iy-1,ix,ic] + else: + ccdf[ic] = ccdf[ic] + (1.0-sumwts)*gcdf[ic] + +# Keep looping until all the thresholds are estimated: + +# Correct and write the distribution to the output file: + nk = nk + 1 + ccdfo = ordrel(ivtype,ncut,ccdf) + +# Write the IK CCDF for this grid node: + if koption == 0: + ikout[ny-iy-1,ix,:] = ccdfo + else: + print('TBD') + return ikout + +def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtcol,zmin,zmax,ltail,ltpar,utail,utpar,nsim, + nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin,ndmax,nodmax,mults,nmult,noct,radius,radius1,sang1, + mxctx,mxcty,ktype,colocorr,sec_map,vario): + +# Parameters from sgsim.inc + MAXNST=2; MAXROT=2; UNEST=-99.0; EPSLON=1.0e-20; VERSION=2.907 + KORDEI=12; MAXOP1=KORDEI+1; MAXINT=2**30 + +# Set other parameters + np.random.seed(seed) + nxy = nx*ny sstrat = 0 # search data and nodes by default, turned off if unconditional - sang1 = 0 # using isotropic search now - sanis1 = 1.0 -# No covariance lookup table - mxctx = int(radius/xsiz)*2+1; mxcty = int(radius/xsiz)*2+1 -# print('cov table / spiral search nx, ny '); print(mxctx); print(mxcty) + radsqd = radius * radius + sanis1 = radius1/radius + if ktype == 4: varred = 1.0 + +# load the variogram + nst = int(vario['nst']) + cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst,dtype=int) + ang = np.zeros(nst); anis = np.zeros(nst) + + c0 = vario['nug']; + cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; + aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + if nst == 2: + cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; + aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + +# Set the constants MAXCTX = mxctx MAXCTY = mxcty MAXCXY = MAXCTX * MAXCTY - -# Grid extents - MAXX = nx - MAXY = ny - MXY = MAXX * MAXY - -# Kriging system - MAXKR1 = 2 * MAXNOD + 2 * MAXSAM + 1 + MAXX = nx + MAXY = ny + MAXZ = 1 # assuming 2D for now + MXY = MAXX * MAXY + if MXY < 100: MXY = 100 + MAXNOD = nodmax + MAXSAM = ndmax + MAXKR1 = MAXNOD + MAXSAM + 1 +# print('MAXKR1'); print(MAXKR1) MAXKR2 = MAXKR1 * MAXKR1 MAXSBX = 1 - if nx > 1: + if nx > 1: MAXSBX = int(nx/2) - if MAXSBX > 50: - MAXSBX=50 + if MAXSBX > 50: MAXSBX=50 MAXSBY = 1 if ny > 1: MAXSBY = int(ny/2) - if MAXSBY > 50: - MAXSBY=50 -# print('ncut'); print(ncut) - -# load the variogram - MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) - aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) - ang = np.zeros((ncut,MAXNST)); anis = np.zeros((ncut,MAXNST)) + if MAXSBY > 50: MAXSBY=50 + + MAXSBZ = 1 + MAXSB = MAXSBX*MAXSBY*MAXSBZ + +# Declare arrays + + dist = np.zeros(ndmax) + nums = np.zeros(ndmax,dtype = int) + +# Perform some quick checks + if nx > MAXX or ny> MAXY: + print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') + print(' you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.') + return sim + if ltail != 1 and ltail != 2: + print('ERROR invalid lower tail option ' + str(ltail)) + print(' only allow 1 or 2 - see GSLIB manual ') + return sim + if utail != 1 and utail != 2 and utail != 4: + print('ERROR invalid upper tail option ' + str(ltail)) + print(' only allow 1,2 or 4 - see GSLIB manual ') + return sim + if utail == 4 and utpar < 1.0: + print('ERROR invalid power for hyperbolic tail' + str(utpar)) + print(' must be greater than 1.0!') + return sim + if ltail == 2 and ltpar < 0.0: + print('ERROR invalid power for power model' + str(ltpar)) + print(' must be greater than 0.0!') + return sim + if utail == 2 and utpar < 0.0: + print('ERROR invalid power for power model' + str(utpar)) + print(' must be greater than 0.0!') + return sim -# print('varios - 1 vario'); print(vario[1]) - - for icut in range(0,ncut): -# print('icut'); print(icut) - nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; - ang[icut,0] = vario[icut]['azi1']; - aa[icut,0] = vario[icut]['hmaj1']; anis[icut,0] = vario[icut]['hmin1']/vario[icut]['hmaj1']; - if nst[icut] == 2: - cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; - aa[icut,1] = vario[icut]['hmaj2']; anis[icut,1] = vario[icut]['hmin2']/vario[icut]['hmaj2']; - -# print('check loaded cov model- icut '); print(icut) -# print(cc[icut],aa[icut],it[icut],ang[icut],anis[icut]) - - - # Load the data df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax - MAXDAT = len(df_extract) - nd = MAXDAT - MAXCUT = ncut - MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) - maxcov = np.zeros(ncut) - - # Allocate the needed memory: - xa = np.zeros(MAXSAM) - ya = np.zeros(MAXSAM) - vra = np.zeros(MAXSAM) - dist = np.zeros(MAXSAM) - nums = np.zeros(MAXSAM) - r = np.zeros(MAXEQ) - rr = np.zeros(MAXEQ) - s = np.zeros(MAXEQ) - a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) - - nviol = np.zeros(MAXCUT) - aviol = np.zeros(MAXCUT) - xviol = np.zeros(MAXCUT) - - ccdf = np.zeros(ncut) - ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) - + nd = len(df_extract) + ndmax = min(ndmax,nd) x = df_extract[xcol].values y = df_extract[ycol].values - v = df_extract[vcol].values - - MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used - gcut = np.zeros(MAXTAB) - -# The indicator data are constructed knowing the thresholds and the -# data value. - -# print('ncut'); print(ncut) - if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + vr = df_extract[vcol].values + vr_orig = np.copy(vr) +# print('size of data extract'); print(len(vr)) + wt = []; wt = np.array(wt) + if wcol > -1: + wt = df_extract[wcol].values else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v + wt = np.ones(nd) + sec = []; sec = np.array(sec) + if scol > -1: + sec = df_extract[scol].values + if itrans == 1: + if ismooth == 1: + dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] + ntr = len(dftrans_extract) + vrtr = dftrans_extrac[tcol].values + if twtcol > -1: + vrgtr = dftrans_extrac[tcol].values + else: + vrgtr = np.ones(ntr) + else: + vrtr = df_extract[vcol].values + ntr = len(df_extract) + vrgtr = np.copy(wt) + twt = np.sum(vrgtr) +# sort + vrtr,vrgtr = dsortem(0,ntr,vrtr,2,b=vrgtr) + +# Compute the cumulative probabilities and write transformation table + twt = max(twt,EPSLON) + oldcp = 0.0 + cp = 0.0 +# print('ntr'); print(ntr) + for j in range(0,ntr): + cp = cp + vrgtr[j]/twt + w = (cp + oldcp)*0.5 + vrg = gauinv(w) + oldcp = cp +# Now, reset the weight to the normal scores value: + vrgtr[j] = vrg + + twt = np.sum(wt) +# Normal scores transform the data + for id in range(0,nd): + if itrans == 1: + vrr = vr[id] + j = dlocate(vrtr,1,nd,vrr) + j = min(max(0,j),(nd-2)) + vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) + if vrg < vrgtr[0]: vrg = vrgtr[0] + if(vrg > vrgtr[nd-1]): vrg = vrgtr[nd-1] + vr[id] = vrg + + weighted_stats_orig = DescrStatsW(vr_orig,weights=wt) + orig_av = weighted_stats_orig.mean + orig_ss = weighted_stats_orig.var + + weighted_stats = DescrStatsW(vr,weights=wt) + av = weighted_stats.mean + ss = weighted_stats.var + + print('\n Data for SGSIM: Number of acceptable data = ' + str(nd)) + print(' Number trimmed = ' + str(len(df)- nd)) + print(' Weighted Average = ' + str(round(orig_av,4))) + print(' Weighted Variance = ' + str(round(orig_ss,4))) + print(' Weighted Transformed Average = ' + str(round(av,4))) + print(' Weighted Transformed Variance = ' + str(round(ss,4))) + +# Read in secondary data + sim = np.random.rand(nx*ny) + index = 0 + for ixy in range(0,nxy): + sim[index] = index + + lvm = []; lvm = np.array(lvm) + if ktype >= 2: + #lvm = np.copy(sec_map.flatten()) + ind = 0 + lvm = np.zeros(nxy) + for iy in range(0,ny): + for ix in range(0,nx): + lvm[ind] = sec_map[ny-iy-1,ix] + ind = ind + 1 + if ktype == 2 and itrans == 1: + for ixy in range(0,nxy): +# Do we to transform the secondary variable for a local mean? + vrr = lvm[ixy] + j = dlocate(vrtr,1,ntr,vrr) + j = min(max(0,j),(ntr-2)) + vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) + if vrg < vrgtr[0]: vrg = vrgtr[0] + if(vrg > vrgtr[ntr-1]): vrg = vrgtr[nd-1] + lvm[ixy] = vrg + av = np.average(lvm) + ss = np.var(lvm) + print(' Secondary Data: Number of data = ' + str(nx*ny)) + print(' Equal Weighted Average = ' + str(round(av,4))) + print(' Equal Weighted Variance = ' + str(round(ss,4))) + +# Do we need to work with data residuals? (Locally Varying Mean) + if ktype == 2: + sec = np.zeros(nd) + for idd in range(0,nd): + ix = getindex(nx,xmn,xsiz,x[idd]) + iy = getindex(ny,ymn,ysiz,y[idd]) + index = ix + (iy-1)*nx + sec[idd] = lvm[index] +# Calculation of residual moved to krige subroutine: vr(i)=vr(i)-sec(i) + + +# Do we need to get an external drift attribute for the data? + if ktype == 3: + for idd in range(0,nd): + if sec[i] != UNEST: + ix = getindx(nx,xmn,xsiz,x[idd]) + iy = getindx(ny,ymn,ysiz,y[idd]) + ind = ix + (iy)*nx + sec[ind] = lvm[ind] + +# Transform the secondary attribute to normal scores? + if ktype == 4: + order_sec = np.zeros(nxy) + ind = 0 + for ixy in range(0,nxy): + order_sec[ixy] = ind + ind = ind + 1 + print(' Transforming Secondary Data with') + print(' variance reduction of ' + str(varred)) + lvm,order_sec = dsortem(0,nxy,lvm,2,b=order_sec) + oldcp = 0.0 + cp = 0.0 + for i in range(0,nxy): + cp = cp + (1.0/(nxy)) + w = (cp + oldcp)/2.0 + lvm[i] = gauinv(w) + lvm[i] = lvm[i] * varred + oldcp = cp + order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) +# return np.reshape(lvm,(ny,nx)) # check the transform -# print('loaded data '); print(vr) -# Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) - -# Summary statistics of the input data - - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() - ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() - print('Data for IK3D: Variable column ' + str(vcol)) - print(' Number = ' + str(MAXDAT)) - ndh = MAXDAT - - actloc = np.zeros(MAXDAT, dtype = int) # need to set up data at node locations - for i in range(1,MAXDAT): - actloc[i] = i - # Set up the rotation/anisotropy matrices that are needed for the -# variogram and search: - +# variogram and search. print('Setting up rotation matrices for variogram and search') - radsqd = radius * radius - rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[ic],cc[ic],ang[ic],9999.9) - rotmat.append(rotmat_temp) - - #return rotmat -# Set up the covariance table and the spiral search based just on the first variogram -# This is ok as we are not using the covariance look up table, just spiral search for previous nodes - isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here - -# print('ang[0]'); print(ang[0]) - - if nst[0] == 1: - global_rotmat = setrot(ang[0,0],ang[0,0],sang1,anis[0,0],anis[0,0],sanis1,nst[0],MAXROT=2) + if nst == 1: + rotmat = setrot(ang[0],ang[0],sang1,anis[0],anis[0],sanis1,nst,MAXROT=2) else: - global_rotmat = setrot(ang[0,0],ang[1,0],sang1,anis[0,0],anis[1,0],sanis1,nst[0],MAXROT=2) - - cov_table,tmp2,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) - - -# print('spiral search number nodes '); print(nlooku) -# print('ixnode,iynode'); print(ixnode,iynode) -# Initialize accumulators: # not setup yet - nk = 0 - xk = 0.0 - vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 - xviol[icut] = -1.0 + rotmat = setrot(ang[0],ang[1],sang1,anis[0],anis[1],sanis1,nst,MAXROT=2) + isrot = 2 # search rotation is appended as 3rd -# print('Working on the kriging') + rotmat_2d, maxcov = setup_rotmat2(c0,nst,it,cc,ang) # will use one in the future +# print('MaxCov = ' + str(maxcov)) -# Report on progress from time to time: - if koption == 0: - nxy = nx*ny - nloop = nxy - irepo = max(1,min((nxy/10),10000)) - else: - nloop = 10000000 - irepo = max(1,min((nd/10),10000)) - ddh = 0.0 - +# Make a KDTree for fast search of nearest neighbours + dp = list((y[i], x[i]) for i in range(0,nd)) + data_locs = np.column_stack((y,x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + +# Set up the covariance table and the spiral search: + cov_table,tmp,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, + xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,rotmat,radsqd) + +# print('Covariance Table'); print(cov_table) # MAIN LOOP OVER ALL THE SIMULAUTIONS: -# for isim in range(0,nsim): # will add multiple realizations soon - + for isim in range(0,nsim): + # Work out a random path for this realization: - sim = np.random.rand(nx*ny) - order = np.zeros(nxy) - ind = 0 - for ixy in range(0,nxy): - order[ixy] = ind - ind = ind + 1 - -# Multiple grid search works with multiples of 4 (yes, that is -# soat arbitrary): + sim = np.random.rand(nx*ny) + order = np.zeros(nxy) + ind = 0 + for ixy in range(0,nxy): + order[ixy] = ind + ind = ind + 1 - if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) -# print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 - index = jx + (jy-1)*nx - sim[index] = sim[index] - (imult+1) +# The multiple grid search works with multiples of 4 (yes, that is +# somewhat arbitrary): - -# Inlize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) - sim.fill(UNEST) - tmp.fill(0.0) - print('Working on a single realization, seed ' + str(seed)) -# print('Random Path'); print(order) - -# As the data to the closest grid node: + if mults == 1: + for imult in range(0,nmult): + nny = int(max(1,ny/((imult+1)*4))) + nnx = int(max(1,nx/((imult+1)*4))) +# print('multi grid - nnx, nny'); print(nnx,nny) + jy = 1 + jx = 1 + for iy in range(0,nny): + if nny > 0: jy = iy*(imult+1)*4 + for ix in range(0,nnx): + if nnx > 0: jx = ix*(imult+1)*4 + index = jx + (jy-1)*nx + sim[index] = sim[index] - (imult+1) - TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) - ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz -# print('xx, yy' + str(xx) + ',' + str(yy)) - test = abs(xx-x[idd]) + abs(yy-y[idd]) -# As this data to the node (unless there is a closer data): - if sstrat == 1 or (sstrat == 0 and test <= TINY): - if sim[ind] > UNEST: - id2 = int(sim[ind]+0.5) - test2 = abs(xx-x[id2]) + abs(yy-y[id2]) - if test <= test2: - sim[ind] = idd - else: - sim[ind] = idd +# Initialize the simulation: + sim, order = dsortem(0,nxy,sim,2,b=order) + sim.fill(UNEST) + print('Working on realization number ' + str(isim)) -# As a flag so that this node does not get simulated: - +# Assign the data to the closest grid node: -# Another data values into the simulated grid: - for ind in range(0,nxy): - idd = int(sim[ind]+0.5) - if idd > 0: - sim[ind] = vr[idd] - else: - tmp[ind] = sim[ind] - sim[ind] = UNEST - irepo = max(1,min((nxy/10),10000)) + TINY = 0.0001 + for idd in range(0,nd): +# print('data'); print(x[idd],y[idd]) + ix = getindex(nx,xmn,xsiz,x[idd]) + iy = getindex(ny,ymn,ysiz,y[idd]) + ind = ix + (iy-1)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz +# print('xx, yy' + str(xx) + ',' + str(yy)) + test = abs(xx-x[idd]) + abs(yy-y[idd]) -# LOOP OVER ALL THE NODES: - for ind in range(0,nxy): - if (int(ind/irepo)*irepo) == ind: - print(' currently on node ' + str(ind)) - -# Find the index on the random path, check if assigned data and get location +# Assign this data to the node (unless there is a closer data): + if sstrat == 1: + if sim[ind] > 0.0: + id2 = int(sim[ind]+0.5) + test2 = abs(xx-x(id2)) + abs(yy-y(id2)) + if test <= test2: + sim[ind] = idd + else: + sim[ind] = id2 - index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz - current_node = (yy,xx) -# print('Current_node'); print(current_node) +# Assign a flag so that this node does not get simulated: + if sstrat == 0 and test <= TINY: sim[ind]=10.0*UNEST -# Now we'll simulate the point ix,iy,iz. First, get the close data +# Now, enter data values into the simulated grid: + for ind in range(0,nxy): + idd = int(sim[ind]+0.5) + if idd > 0: sim[ind] = vr[id] + irepo = max(1,min((nxy/10),10000)) + +# MAIN LOOP OVER ALL THE NODES: + for ind in range(0,nxy): + if (int(ind/irepo)*irepo) == ind: + print(' currently on node ' + str(ind)) + +# Figure out the location of this point and make sure it has +# not been assigned a value already: + + index = int(order[ind]+0.5) + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue + iy = int((index)/nx) + ix = index - (iy)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz + current_node = (yy,xx) +# print('Current_node'); print(current_node) + +# Now, we'll simulate the point ix,iy,iz. First, get the close data # and make sure that there are enough to actually simulate a value, # we'll only keep the closest "ndmax" data, and look for previously # simulated grid nodes: - if sstrat == 0: -# print('searching for nearest data') - na = -1 # accounting for 0 as first index - if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + if sstrat == 0: +# print('searching for nearest data') + na = -1 # accounting for 0 as first index + if ndmax == 1: + dist = np.zeros(1); nums = np.zeros(1) + dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + else: + dist, nums = tree.query(current_node,ndmax) + # remove any data outside search radius + +# print('nums'); print(nums) +# print('dist'); print(dist) + na = len(dist) + nums = nums[dist 0: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') - else: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') - cnodeiv[ncut,:] = cnodev + gmean = 0.0 -# print('indicator transformed nearest nodes'); print(cnodeiv) - -# print('srchnd'); print(ncnode,icnode,cnodev,cnodex,cnodey) -# print('Result of srchnd, cnodex = '); print(cnodex) - nclose = na -# print('*****srch node, nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) -# print('near data'); print(nums) -# print('near data distance'); print(dist) - # print('nums'); print(nums) + if nclose+ncnode < 1: + cmean = gmean + cstdev = 1.0 -# What cdf value are we looking for? - zval = UNEST - cdfval = np.random.rand() +#Perform the kriging. Note that if there are fewer than four data +# then simple kriging is prefered so that the variance of the +# realization does not become artificially inflated: -# Use the global distribution? -# check inputs - # print('nst'); print(nst) + else: + lktype = ktype + if ktype == 1 and (nclose+ncnode) < 4: lktype=0 + cmean, cstdev = krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,nums,cov_table,nctx,ncty,icnode,ixnode,iynode,cnodev,cnodex,cnodey, + nst,c0,9999.9,cc,aa,it,ang,anis,rotmat_2d,maxcov,MAXCTX,MAXCTY,MAXKR1, + MAXKR2) - - - if nclose + ncnode <= 0: -# print('nclose & ncnode'); print(nclose, ncnode) - zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) - else: -# print('kriging') -# Estimate the local distribution by indicator kriging: -# print('maxcov'); print(maxcov) - for ic in range(0,ncut): -# print('check kriging cov model- icut '); print(ic) -# print('node data values for kriging'); print(cnodev) -# print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) - #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) - if ktype == 0: - gmean = gcdf[ic] - elif ktype == 2: - gmean = trend1d[index,ic] - else: - gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used -# print('gmean'); print(gmean) - ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, - icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], - rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) - # print('ccdf'); print(ccdf) - -# Correct order relations: - ccdfo = ordrel(ivtype,ncut,ccdf) -# Draw from the local distribution: - zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) - sim[index] = zval - # print('zval'); print(zval) - +# Draw a random number and assign a value to this node: + p = np.random.rand() + xp = gauinv(p) + sim[index] = xp * cstdev + cmean +# print('simulated value = ' + str(sim[index])) + +# Quick check for far out results: + if abs(cmean) > 5.0 or abs(cstdev) > 5.0 or abs(sim[index]) > 6.0: + print('WARNING: grid node location: ' + str(ix) + ',' + str(iy)) + print(' conditional mean and stdev: ' + str(cmean) + ',' + str(cstdev)) + print(' simulated value: ' + str(sim[index])) + +# Do we need to reassign the data to the grid nodes? + if sstrat == 0: + print('Reassigning data to nodes') + for iid in range(0,nd): + ix = getindex(nx,xmn,xsiz,x[iid]) + iy = getindex(ny,ymn,ysiz,y[iid]) + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz + ind = ix + (iy-1)*nx + test=abs(xx-x[iid])+abs(yy-y[iid]) + if test <= TINY: sim[ind] = vr[iid] + +# Back transform each value and write results: + ne = 0 + av = 0.0 + ss = 0.0 + for ind in range(0,nxy): + iy = int((index-1)/nx) + 1 + ix = index - (iy-1)*nx + simval = sim[ind] + if simval > -9.0 and simval < 9.0: + ne = ne + 1 + av = av + simval + ss = ss + simval*simval + if itrans == 1 and simval > (UNEST+EPSLON): + simval = backtr_value(simval,vrtr,vrgtr,zmin,zmax,ltail,ltpar,utail,utpar) + if simval < zmin: simval = zmin + if simval > zmax: simval = zmax + sim[ind] = simval +# print('simulated value = ' + str(sim[ind]) + ' at location index = ' + str(ind)) + av = av / max(ne,1.0) + ss =(ss / max(ne,1.0)) - av * av + print('\n Realization ' + str(isim) + ': number = ' + str(ne)) + print(' mean = ' + str(round(av,4)) + ' (close to 0.0?)') + print(' variance = ' + str(round(ss,4)) + ' (close to gammabar(V,V)? approx. 1.0)') # END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] + sim_out = np.zeros((ny,nx)) + for ind in range(0,nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1,ix] = sim[ind] + return sim_out - return sim_out - - -def kb2d_locations( - df, - xcol, - ycol, - vcol, - tmin, - tmax, - df_loc, - xcol_loc, - ycol_loc, - ndmin, - ndmax, - radius, - ktype, - skmean, - vario, -): - """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the +def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin, + ndmax,nodmax,mults,nmult,noct,radius,ktype,vario): + + """A 2D version of GSLIB's SISIM Indicator Simulation program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Jan, 2019). Version for kriging at a set of spatial locations. + Austin (March, 2019). WARNING: only tested for cateogrical ktype 0, 1 and 2 (locally variable proportion). :param df: pandas DataFrame with the spatial data :param xcol: name of the x coordinate column :param ycol: name of the y coordinate column - :param vcol: name of the property column + :param vcol: name of the property column (cateogorical or continuous - note continuous is untested) + :param ivtype: variable type, 0 - categorical, 1 - continuous + :param koption: kriging option, 0 - estimation, 1 - cross validation (under construction) + :param ncut: number of categories or continuous thresholds + :param thresh: an ndarray with the category labels or continuous thresholds + :param gcdf: global CDF, not used if trend is present + :param trend: an ndarray [ny,ny,ncut] with the local trend proportions or cumulative CDF values :param tmin: property trimming limit :param tmax: property trimming limit - :param df_loc: pandas DataFrame with the locations to krige - :param xcol: name of the x coordinate column for locations to krige - :param ycol: name of the y coordinate column for locations to krige + :param nx: definition of the grid system (x axis) + :param xmn: definition of the grid system (x axis) + :param xsiz: definition of the grid system (x axis) + :param ny: definition of the grid system (y axis) + :param ymn: definition of the grid system (y axis) + :param ysiz: definition of the grid system (y axis) + :param nxdis: number of discretization points for a block + :param nydis: number of discretization points for a block :param ndmin: minimum number of data points to use for kriging a block :param ndmax: maximum number of data points to use for kriging a block :param radius: maximum isotropic search radius - :param ktype: - :param skmean: - :param vario: + :param ktype: kriging type, 0 - simple kriging and 1 - ordinary kriging + :param vario: list with all of the indicator variograms (sill of 1.0) in consistent order with above parameters :return: """ - -# Constants - UNEST = -999. - EPSLON = 1.0e-10 - VERSION = 2.907 - first = True - PMX = 9999.0 +# Checks + if utail == 3 or ltail == 3 or middle == 3: + print('ERROR - distribution extrapolation option 3 with table is not available') + return sim_out + if xcol == "" or ycol == "": + print('ERROR - must have x and y column in the DataFrame') + return sim_out +# Set parameters from the include + UNEST = -99.0 + EPSLON = 1.0e-20 + VERSION = 0.001 + np.random.seed(seed) + colocorr = 0.0 # no collocated cokriging + lvm = 0 # no kriging with a locally variable mean + sec = []; sec = np.array(sec) # no secondary data + ng = 0 # no tabulated values + +# Find the needed paramters: + PMX = 9999.9 MAXSAM = ndmax + 1 - MAXKD = MAXSAM + 1 - MAXKRG = MAXKD * MAXKD - + MAXEQ = MAXSAM + 1 + nxy = nx*ny + mik = 0 # full indicator kriging + use_trend = False + trend1d = np.zeros((nxy,1)) # no trend make a dummy trend + if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: + trend1d = np.zeros((nxy,ncut)) + use_trend = True + index = 0 + for iy in range(0,ny): + for ix in range(0,nx): + for ic in range(0,ncut): + trend1d[index,ic] = trend[ny-iy-1,ix,ic] # copy trend + index = index + 1 + + MAXORD = nxy + MAXNOD = nodmax + + cnodeiv = np.zeros((ncut+1,MAXNOD)) + + tmp = np.zeros(MAXORD) + sstrat = 0 # search data and nodes by default, turned off if unconditional + sang1 = 0 # using isotropic search now + sanis1 = 1.0 +# No covariance lookup table + mxctx = int(radius/xsiz)*2+1; mxcty = int(radius/xsiz)*2+1 +# print('cov table / spiral search nx, ny '); print(mxctx); print(mxcty) + MAXCTX = mxctx + MAXCTY = mxcty + MAXCXY = MAXCTX * MAXCTY + +# Grid extents + MAXX = nx + MAXY = ny + MXY = MAXX * MAXY + +# Kriging system + MAXKR1 = 2 * MAXNOD + 2 * MAXSAM + 1 + MAXKR2 = MAXKR1 * MAXKR1 + MAXSBX = 1 + if nx > 1: + MAXSBX = int(nx/2) + if MAXSBX > 50: + MAXSBX=50 + MAXSBY = 1 + if ny > 1: + MAXSBY = int(ny/2) + if MAXSBY > 50: + MAXSBY=50 +# print('ncut'); print(ncut) + # load the variogram - nst = vario['nst'] - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) - ang = np.zeros(nst); anis = np.zeros(nst) - - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; - if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; - -# Allocate the needed memory: + MAXNST = 2 + nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) + aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) + ang = np.zeros((ncut,MAXNST)); anis = np.zeros((ncut,MAXNST)) + +# print('varios - 1 vario'); print(vario[1]) + + for icut in range(0,ncut): +# print('icut'); print(icut) + nst[icut] = int(vario[icut]['nst']) + c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; + ang[icut,0] = vario[icut]['azi1']; + aa[icut,0] = vario[icut]['hmaj1']; anis[icut,0] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + if nst[icut] == 2: + cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; + aa[icut,1] = vario[icut]['hmaj2']; anis[icut,1] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + +# print('check loaded cov model- icut '); print(icut) +# print(cc[icut],aa[icut],it[icut],ang[icut],anis[icut]) + + + +# Load the data + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + MAXDAT = len(df_extract) + nd = MAXDAT + MAXCUT = ncut + MAXNST = 2 + MAXROT = MAXNST*MAXCUT+ 1 + ikout = np.zeros((nx,ny,ncut)) + maxcov = np.zeros(ncut) + + # Allocate the needed memory: xa = np.zeros(MAXSAM) ya = np.zeros(MAXSAM) vra = np.zeros(MAXSAM) dist = np.zeros(MAXSAM) nums = np.zeros(MAXSAM) - r = np.zeros(MAXKD) - rr = np.zeros(MAXKD) - s = np.zeros(MAXKD) - a = np.zeros(MAXKRG) - klist = np.zeros(len(df_loc)) # list of kriged estimates - vlist = np.zeros(len(df_loc)) + r = np.zeros(MAXEQ) + rr = np.zeros(MAXEQ) + s = np.zeros(MAXEQ) + a = np.zeros(MAXEQ*MAXEQ) + ikmap = np.zeros((nx,ny,ncut)) + vr = np.zeros((MAXDAT,MAXCUT+1)) + + nviol = np.zeros(MAXCUT) + aviol = np.zeros(MAXCUT) + xviol = np.zeros(MAXCUT) + + ccdf = np.zeros(ncut) + ccdfo = np.zeros(ncut) + ikout = np.zeros((nx,ny,ncut)) -# Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax - nd = len(df_extract) - ndmax = min(ndmax,nd) x = df_extract[xcol].values y = df_extract[ycol].values - vr = df_extract[vcol].values - -# Load the estimation loactions - nd_loc = len(df_loc) - x_loc = df_loc[xcol].values - y_loc = df_loc[ycol].values - vr_loc = df_loc[vcol].values - -# Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + v = df_extract[vcol].values -# Summary statistics for the data after trimming - avg = vr.mean() - stdev = vr.std() - ss = stdev**2.0 - vrmin = vr.min() - vrmax = vr.max() + MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used + gcut = np.zeros(MAXTAB) -# Initialize accumulators: - cbb = 0.0 - rad2 = radius*radius +# The indicator data are constructed knowing the thresholds and the +# data value. -# Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) -# Keep this value to use for the unbiasedness constraint: - unbias = cov - cbb = cov - first = False +# print('ncut'); print(ncut) + if ivtype == 0: + for icut in range(0,ncut): + vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + else: + for icut in range(0,ncut): + vr[:,icut] = np.where(v <= thresh[icut], '1', '0') + vr[:,ncut] = v -# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: - nk = 0 - ak = 0.0 - vk = 0.0 - - for idata in range(len(df_loc)): - print('Working on location ' + str(idata)) - xloc = x_loc[idata] - yloc = y_loc[idata] - current_node = (yloc,xloc) - -# Find the nearest samples within each octant: First initialize -# the counter arrays: - na = -1 # accounting for 0 as first index - dist.fill(1.0e+20) - nums.fill(-1) - dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search - # remove any data outside search radius - na = len(dist) - nums = nums[dist UNEST: - nk = nk + 1 - ak = ak + est - vk = vk + est*est +# print('spiral search number nodes '); print(nlooku) +# print('ixnode,iynode'); print(ixnode,iynode) +# Initialize accumulators: # not setup yet + nk = 0 + xk = 0.0 + vk = 0.0 + for icut in range (0,ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 + xviol[icut] = -1.0 -# END OF MAIN LOOP OVER ALL THE BLOCKS: +# print('Working on the kriging') - if nk >= 1: - ak = ak / float(nk) - vk = vk/float(nk) - ak*ak - print(' Estimated ' + str(nk) + ' blocks ') - print(' average ' + str(ak) + ' variance ' + str(vk)) +# Report on progress from time to time: + if koption == 0: + nxy = nx*ny + nloop = nxy + irepo = max(1,min((nxy/10),10000)) + else: + nloop = 10000000 + irepo = max(1,min((nd/10),10000)) + ddh = 0.0 - return klist, vlist - -#Partial Correlation in Python (clone of Matlab's partialcorr) +# MAIN LOOP OVER ALL THE SIMULAUTIONS: +# for isim in range(0,nsim): # will add multiple realizations soon -#This uses the linear regression approach to compute the partial correlation -#(might be slow for a huge number of variables). The algorithm is detailed here: +# Work out a random path for this realization: + sim = np.random.rand(nx*ny) + order = np.zeros(nxy) + ind = 0 + for ixy in range(0,nxy): + order[ixy] = ind + ind = ind + 1 -# http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression +# Multiple grid search works with multiples of 4 (yes, that is +# soat arbitrary): -#Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y}, -#the algorithm can be summarized as -# 1) perform a normal linear least-squares regression with X as the target and Z as the predictor -# 2) calculate the residuals in Step #1 -# 3) perform a normal linear least-squares regression with Y as the target and Z as the predictor -# 4) calculate the residuals in Step #3 -# 5) calculate the correlation coefficient between the residuals from Steps #2 and #4; -# The result is the partial correlation between X and Y while controlling for the effect of Z + if mults == 1: + for imult in range(0,nmult): + nny = int(max(1,ny/((imult+1)*4))) + nnx = int(max(1,nx/((imult+1)*4))) +# print('multi grid - nnx, nny'); print(nnx,nny) + jy = 1 + jx = 1 + for iy in range(0,nny): + if nny > 0: jy = iy*(imult+1)*4 + for ix in range(0,nnx): + if nnx > 0: jx = ix*(imult+1)*4 + index = jx + (jy-1)*nx + sim[index] = sim[index] - (imult+1) + + +# Inlize the simulation: + sim, order = dsortem(0,nxy,sim,2,b=order) + sim.fill(UNEST) + tmp.fill(0.0) + print('Working on a single realization, seed ' + str(seed)) +# print('Random Path'); print(order) -#Date: Nov 2014 -#Author: Fabian Pedregosa-Izquierdo, f@bianp.net -#Testing: Valentina Borghesani, valentinaborghesani@gmail.com +# As the data to the closest grid node: -def partial_corr(C): -# Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling -# for the remaining variables in C. + TINY = 0.0001 + for idd in range(0,nd): +# print('data'); print(x[idd],y[idd]) + ix = getindex(nx,xmn,xsiz,x[idd]) + iy = getindex(ny,ymn,ysiz,y[idd]) + ind = ix + (iy-1)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz +# print('xx, yy' + str(xx) + ',' + str(yy)) + test = abs(xx-x[idd]) + abs(yy-y[idd]) -# Parameters -# C : array-like, shape (n, p) -# Array with the different variables. Each column of C is taken as a variable -# Returns -# P : array-like, shape (p, p) -# P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling -# for the remaining variables in C. +# As this data to the node (unless there is a closer data): + if sstrat == 1 or (sstrat == 0 and test <= TINY): + if sim[ind] > UNEST: + id2 = int(sim[ind]+0.5) + test2 = abs(xx-x[id2]) + abs(yy-y[id2]) + if test <= test2: + sim[ind] = idd + else: + sim[ind] = idd - C = np.asarray(C) - p = C.shape[1] - P_corr = np.zeros((p, p), dtype=np.float) - for i in range(p): - P_corr[i, i] = 1 - for j in range(i+1, p): - idx = np.ones(p, dtype=np.bool) - idx[i] = False - idx[j] = False - beta_i = linalg.lstsq(C[:, idx], C[:, j])[0] - beta_j = linalg.lstsq(C[:, idx], C[:, i])[0] - res_j = C[:, j] - C[:, idx].dot( beta_i) - res_i = C[:, i] - C[:, idx].dot(beta_j) - corr = stats.pearsonr(res_i, res_j)[0] - P_corr[i, j] = corr - P_corr[j, i] = corr - return P_corr +# As a flag so that this node does not get simulated: -def semipartial_corr(C): # Michael Pyrcz modified the function above by Fabian Pedregosa-Izquierdo, f@bianp.net for semipartial correlation - C = np.asarray(C) - p = C.shape[1] - P_corr = np.zeros((p, p), dtype=np.float) - for i in range(p): - P_corr[i, i] = 1 - for j in range(i+1, p): - idx = np.ones(p, dtype=np.bool) - idx[i] = False - idx[j] = False - beta_i = linalg.lstsq(C[:, idx], C[:, j])[0] - res_j = C[:, j] - C[:, idx].dot( beta_i) - res_i = C[:, i] # just use the value, not a residual - corr = stats.pearsonr(res_i, res_j)[0] - P_corr[i, j] = corr - P_corr[j, i] = corr - return P_corr -def sqdist3(x1,y1,z1,x2,y2,z2,ind,rotmat): - """Squared Anisotropic Distance Calculation Given Matrix Indicator - 3D - - This routine calculates the anisotropic distance between two points - given the coordinates of each point and a definition of the - anisotropy. - - Converted from original fortran GSLIB (Deutsch and Journel, 1998) to Python by Wendi Liu, University of Texas at Austin - - INPUT VARIABLES: - - x1,y1,z1 Coordinates of first point - x2,y2,z2 Coordinates of second point - ind The rotation matrix to use - rotmat The rotation matrices""" - - dx = x1 - x2 - dy = y1 - y2 - dz = z1 - z2 - sqdist = 0.0 - for i in range(3): - cont = rotmat[ind, i, 0] * dx + rotmat[ind, i, 1] * dy + rotmat[ind, i, 2] * dz - sqdist += cont**2 - return sqdist +# Another data values into the simulated grid: + for ind in range(0,nxy): + idd = int(sim[ind]+0.5) + if idd > 0: + sim[ind] = vr[idd] + else: + tmp[ind] = sim[ind] + sim[ind] = UNEST + irepo = max(1,min((nxy/10),10000)) -def setrot3(ang1,ang2,ang3,anis1,anis2,ind,rotmat): - """Sets up an Anisotropic Rotation Matrix - 3D - - Sets up the matrix to transform cartesian coordinates to coordinates - accounting for angles and anisotropy - - Converted from original fortran GSLIB (Deutsch and Journel, 1998) to Python by Wendi Liu, University of Texas at Austin - - INPUT PARAMETERS: - - ang1 Azimuth angle for principal direction - ang2 Dip angle for principal direction - ang3 Third rotation angle - anis1 First anisotropy ratio - anis2 Second anisotropy ratio - ind matrix indicator to initialize - rotmat rotation matrices - - Converts the input angles to three angles which make more mathematical sense: - - alpha angle between the major axis of anisotropy and the - E-W axis. Note: Counter clockwise is positive. - beta angle between major axis and the horizontal plane. - (The dip of the ellipsoid measured positive down) - theta Angle of rotation of minor axis about the major axis - of the ellipsoid.""" - - DEG2RAD=np.pi/180.0; EPSLON=1e-20 - if (ang1 >= 0.0)&(ang1<270.0): - alpha = (90.0 - ang1) * DEG2RAD - else: - alpha = (450.0 - ang1) * DEG2RAD - beta = -1.0 * ang2 *DEG2RAD - theta = ang3 * DEG2RAD - - sina = np.sin(alpha) - sinb = np.sin(beta) - sint = np.sin(theta) - cosa = np.cos(alpha) - cosb = np.cos(beta) - cost = np.cos(theta) - ### Construct the rotation matrix in the required memory - - afac1 = 1.0/max(anis1, EPSLON) - afac2 = 1.0/max(anis2, EPSLON) - rotmat[ind,0,0] = cosb * cosa - rotmat[ind,0,1] = cosb * sina - rotmat[ind,0,2] = -sinb - rotmat[ind,1,0] = afac1*(-cost*sina + sint*sinb*cosa) - rotmat[ind,1,1] = afac1*(cost*cosa + sint*sinb*sina) - rotmat[ind,1,2] = afac1*( sint * cosb) - rotmat[ind,2,0] = afac2*(sint*sina + cost*sinb*cosa) - rotmat[ind,2,1] = afac2*(-sint*cosa + cost*sinb*sina) - rotmat[ind,2,2] = afac2*(cost * cosb) - - return rotmat +# LOOP OVER ALL THE NODES: + for ind in range(0,nxy): + if (int(ind/irepo)*irepo) == ind: + print(' currently on node ' + str(ind)) -def gammabar(xsiz, ysiz, zsiz,nst,c0,it,cc,hmaj,hmin,hvert): - """This program calculates the gammabar value from a 3D semivariogram model""" - """Converted from original fortran GSLIB (Deutsch and Journel, 1998) to Python by Wendi Liu, University of Texas at Austin""" - - ###Initialization - rotmat = np.zeros((5, 3, 3)) - EPSLON = 1.0e-20 - MAXNST=4 - maxcov=1.0 - cmax = c0 - nx = 3 - ny = 3 - nz = 6 - ang1 = np.zeros((MAXNST,)) #azimuth - ang2 = np.ones((MAXNST,))*90.0 #dip - ang3 = np.zeros((MAXNST,)) #plenge - anis1 = np.zeros((MAXNST,)) - anis2 = np.zeros((MAXNST,)) - - for i in range(nst): - anis1[i] = hmin[i]/max(hmaj[i],EPSLON) - anis2[i] = hvert[i]/max(hmaj[i],EPSLON) - rotmat = setrot3(ang1[i],ang2[i],ang3[i],anis1[i],anis2[i],i,rotmat) - cmax,cov = cova3(0.0,0.0,0.0,0.0,0.0,0.0,nst,c0,it,cc,hmaj,rotmat,cmax) - ##Discretization parameters - xsz = xsiz/nx - xmn = xsz/2.0 - xzero = xsz * 0.0001 - ysz = ysiz/ny - ymn = ysz/2.0 - yzero = ysz * 0.0001 - zsz = zsiz/nz - zmn = zsz/2.0 - zzero = zsz * 0.0001 - ##Calculate Gammabar - gb = 0.0 - for ix in range(nx): - xxi = xmn +(ix-1)*xsz+xzero - for jx in range(nx): - xxj = xmn +(jx-1)*xsz - for iy in range(ny): - yyi = ymn +(iy-1)*ysz+yzero - for jy in range(ny): - yyj = ymn +(jy-1)*ysz - for iz in range(nz): - zzi = zmn +(iz-1)*zsz+zzero - for jz in range(nz): - zzj = zmn +(jz-1)*zsz - cmax,cov = cova3(xxi,yyi,zzi,xxj,yyj,zzj,nst,c0,it,cc,hmaj,rotmat,cmax) - gb += maxcov-cov - gb = gb/((nx*ny*nz)**2) - return gb - -def gam_3D(array, tmin, tmax, xsiz, ysiz, zsiz, ixd, iyd, izd, nlag, isill): - """GSLIB's GAM program (Deutsch and Journel, 1998) converted from the - original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Nov, 2019). - :param array: 2D gridded data / model - :param tmin: property trimming limit - :param tmax: property trimming limit - :param xsiz: grid cell extents in x direction - :param ysiz: grid cell extents in y direction - :param zsiz: grid cell extents in z direction - :param ixd: lag offset in grid cells - :param iyd: lag offset in grid cells - :param izd: lag offset in grid cells - :param nlag: number of lags to calculate - :param isill: 1 for standardize sill - :return: TODO - """ - if array.ndim ==3: - nz, ny, nx = array.shape - elif array.ndim == 2: - ny, nx = array.shape - elif array.ndim == 1: - ny, nx = 1, len(array) +# Find the index on the random path, check if assigned data and get location - nvarg = 1 # for multiple variograms repeat the program - nxyz = nx * ny * nz # TODO: not used - mxdlv = nlag + index = int(order[ind]+0.5) + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue + iy = int((index)/nx) + ix = index - (iy)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz + current_node = (yy,xx) +# print('Current_node'); print(current_node) - # Allocate the needed memory - lag = np.zeros(mxdlv) - vario = np.zeros(mxdlv) - hm = np.zeros(mxdlv) - tm = np.zeros(mxdlv) - hv = np.zeros(mxdlv) # TODO: not used - npp = np.zeros(mxdlv) - ivtail = np.zeros(nvarg + 2) - ivhead = np.zeros(nvarg + 2) - ivtype = np.zeros(nvarg + 2) - ivtail[0] = 0 - ivhead[0] = 0 - ivtype[0] = 0 +# Now we'll simulate the point ix,iy,iz. First, get the close data +# and make sure that there are enough to actually simulate a value, +# we'll only keep the closest "ndmax" data, and look for previously +# simulated grid nodes: - # Summary statistics for the data after trimming - inside = (array > tmin) & (array < tmax) - avg = array[(array > tmin) & (array < tmax)].mean() # TODO: not used - stdev = array[(array > tmin) & (array < tmax)].std() - var = stdev ** 2.0 - vrmin = array[(array > tmin) & (array < tmax)].min() # TODO: not used - vrmax = array[(array > tmin) & (array < tmax)].max() # TODO: not used - num = ((array > tmin) & (array < tmax)).sum() # TODO: not used + if sstrat == 0: +# print('searching for nearest data') + na = -1 # accounting for 0 as first index + if ndmax == 1: + dist = np.zeros(1); nums = np.zeros(1) + dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + else: + dist, nums = tree.query(current_node,ndmax) + # remove any data outside search radius - # For the fixed seed point, loop through all directions - for iz in range(0, nz): - for iy in range(0, ny): - for ix in range(0, nx): - if inside[iz, iy, ix]: - vrt = array[iz, iy, ix] - ixinc = ixd - iyinc = iyd - izinc = izd - ix1 = ix - iy1 = iy - iz1 = iz - for il in range(0, nlag): - ix1 = ix1 + ixinc - if 0 <= ix1 < nx: - iy1 = iy1 + iyinc - if 1 <= iy1 < ny: - if 1 <= iz1 < nz: - if inside[iz1, iy1, ix1]: - vrh = array[iz1, iy1, ix1] - npp[il] = npp[il] + 1 - tm[il] = tm[il] + vrt - hm[il] = hm[il] + vrh - vario[il] = vario[il] + ((vrh - vrt) ** 2.0) - - # Get average values for gam, hm, tm, hv, and tv, then compute the correct - # "variogram" measure - for il in range(0, nlag): - if npp[il] > 0: - rnum = npp[il] - lag[il] = np.sqrt((ixd * xsiz * il) ** 2 + (iyd * ysiz * il) ** 2 + (izd * zsiz * il) ** 2) - vario[il] = vario[il] / float(rnum) - hm[il] = hm[il] / float(rnum) - tm[il] = tm[il] / float(rnum) +# print('nums'); print(nums) +# print('dist'); print(dist) + na = len(dist) + nums = nums[dist 0: + for icut in range(0,ncut): + cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') + else: + for icut in range(0,ncut): + cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') + cnodeiv[ncut,:] = cnodev -def make_variogram_3D( - nug, - nst, - it1, - cc1, - azi1, - dip1, - hmax1, - hmed1, - hmin1, - it2=1, - cc2=0, - azi2=0, - dip2=0, - hmax2=0, - hmed2=0, - hmin2=0, -): - """Make a dictionary of variogram parameters for application with spatial - estimation and simulation. - - :param nug: Nugget constant (isotropic) - :param nst: Number of structures (up to 2) - :param it1: Structure of 1st variogram (1: Spherical, 2: Exponential, 3: Gaussian) - :param cc1: Contribution of 2nd variogram - :param azi1: Azimuth of 1st variogram - :param dip1: Dip of 1st variogram - :param hmax1: Range in major direction (Horizontal) - :param hmed1: Range in minor direction (Horizontal) - :param hmin1: Range in vertical direction - :param it2: Structure of 2nd variogram (1: Spherical, 2: Exponential, 3: Gaussian) - :param cc2: Contribution of 2nd variogram - :param azi2: Azimuth of 2nd variogram - :param dip1: Dip of 2nd variogram - :param hmax2: Range in major direction (Horizontal) - :param hmed2: Range in minor direction (Horizontal) - :param hmin2: Range in vertical direction - :return: TODO - """ - if cc2 == 0: - nst = 1 - var = dict( - [ - ("nug", nug), - ("nst", nst), - ("it1", it1), - ("cc1", cc1), - ("azi1", azi1), - ("dip1", dip1), - ("hmax1", hmax1), - ("hmed1", hmed1), - ("hmin1", hmin1), - ("it2", it2), - ("cc2", cc2), - ("azi2", azi2), - ("dip2", dip2), - ("hmax2", hmax2), - ("hmed2", hmed2), - ("hmin2", hmin2), - ] - ) - if nug + cc1 + cc2 != 1: - print( - "\x1b[0;30;41m make_variogram Warning: " - "sill does not sum to 1.0, do not use in simulation \x1b[0m" - ) - if ( - cc1 < 0 - or cc2 < 0 - or nug < 0 - or hmax1 < 0 - or hmax2 < 0 - or hmin1 < 0 - or hmin2 < 0 - ): - print( - "\x1b[0;30;41m make_variogram Warning: " - "contributions and ranges must be all positive \x1b[0m" - ) - if hmax1 < hmed1 or hmax2 < hmed2: - print( - "\x1b[0;30;41m make_variogram Warning: " - "major range should be greater than minor range \x1b[0m" - ) - return var +# print('indicator transformed nearest nodes'); print(cnodeiv) -def vmodel_3D( - nlag, - xlag, - azm, - dip, - vario -): - """GSLIB's VMODEL program (Deutsch and Journel, 1998) converted from the - original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Nov, 2019). - :param nlag: number of variogram lags - :param xlag: size of the lags - :param axm: direction by 3D azimuth, 000 is y positive, 090 is x positive - :param dip: direction by 3D dip, 000 is horizontal to x-y plane, 090 is perpendicular to x-y plane - :param vario: dictionary with the variogram parameters - :return: - """ - -# Parameters - MAXNST=4 - DEG2RAD=3.14159265/180.0 - MAXROT=MAXNST+1 - EPSLON = 1.0e-20 - VERSION= 1.01 - -# Declare arrays - index = np.zeros(nlag+1) - h = np.zeros(nlag+1) - gam = np.zeros(nlag+1) - cov = np.zeros(nlag+1) - ro = np.zeros(nlag+1) - -# Load the variogram - nst = vario["nst"] - cc = np.zeros(nst) - aa = np.zeros(nst) - it = np.zeros(nst) - ang_azi = np.zeros(nst) - ang_dip = np.zeros(nst) - anis = np.zeros(nst) - anis_v = np.zeros(nst) - - c0 = vario["nug"] - cc[0] = vario["cc1"] - it[0] = vario["it1"] - ang_azi[0] = vario["azi1"] - ang_dip[0] = vario["dip1"] - aa[0] = vario["hmax1"] - anis[0] = vario["hmed1"] / vario["hmax1"] - anis_v[0] = vario["hmin1"] / vario["hmax1"] - if nst == 2: - cc[1] = vario["cc2"] - it[1] = vario["it2"] - ang_azi[1] = vario["azi2"] - ang_dip[1] = vario["dip2"] - aa[1] = vario["hmax2"] - anis[1] = vario["hmed2"] / vario["hmax2"] - anis_v[1] = vario["hmin2"] / vario["hmax2"] - - xoff = math.sin(DEG2RAD*azm)*math.cos(DEG2RAD*dip)*xlag - yoff = math.cos(DEG2RAD*azm)*math.cos(DEG2RAD*dip)*xlag - zoff = math.sin(DEG2RAD*dip)*xlag - - print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff) + ',' + str(zoff)) - rotmat, maxcov = setup_rotmat_3D(c0, nst, it, cc, ang_azi, ang_dip, 99999.9) - - xx = 0.0; yy = 0.0; zz = 0.0; - - for il in range(0,nlag+1): - index[il] = il - cov[il] = cova3(0.0,0.0,0.0,xx,yy,zz,nst,c0,9999.9,cc,aa,it,anis, anis_v, rotmat, maxcov) - gam[il] = maxcov - cov[il] - ro[il] = cov[il]/maxcov - h[il] = math.sqrt(max((xx*xx + yy*yy + zz*zz),0.0)) - xx = xx + xoff - yy = yy + yoff - zz = zz + zoff +# print('srchnd'); print(ncnode,icnode,cnodev,cnodex,cnodey) +# print('Result of srchnd, cnodex = '); print(cnodex) + nclose = na +# print('*****srch node, nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) +# print('near data'); print(nums) +# print('near data distance'); print(dist) + # print('nums'); print(nums) -# finished - return index,h,gam,cov,ro +# What cdf value are we looking for? + zval = UNEST + cdfval = np.random.rand() -@jit(nopython=True) -def setup_rotmat_3D(c0, nst, it, cc, ang_azi, ang_dip, pmx): - """Setup rotation matrix. - :param c0: nugget constant (isotropic) - :param nst: number of nested structures (max. 4) - :param it: Variogram shapes (i.e., Gaussian, Exponential, Spherical) of each nested structure - :param cc: multiplicative factor of each nested structure - :param ang_azi: azimuths of each nested structure - :param ang_dip: dips of each nested structure - :param pmx: TODO - :return: TODO - """ - PI = 3.141_592_65 - DTOR = PI / 180.0 - - # The first time around, re-initialize the cosine matrix for the variogram - # structures - rotmat = np.zeros((9, nst)) - maxcov = c0 - for js in range(0, nst): - azmuth = (90.0 + ang_azi[js]) * DTOR - dip = (ang_dip[js]) * DTOR - rotmat[0, js] = math.cos(azmuth) - rotmat[1, js] = -1 * math.sin(azmuth) - rotmat[2, js] = 0 - rotmat[3, js] = math.cos(dip) * math.sin(azmuth) - rotmat[4, js] = math.cos(dip) * math.cos(azmuth) - rotmat[5, js] = -1 * math.sin(dip) - rotmat[6, js] = math.sin(dip) * math.sin(azmuth) - rotmat[7, js] = math.sin(dip) * math.cos(azmuth) - rotmat[8, js] = math.cos(dip) - if it[js] == 4: - maxcov = maxcov + pmx +# Use the global distribution? +# check inputs + # print('nst'); print(nst) + + + + if nclose + ncnode <= 0: +# print('nclose & ncnode'); print(nclose, ncnode) + zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) else: - maxcov = maxcov + cc[js] - return rotmat, maxcov +# print('kriging') +# Estimate the local distribution by indicator kriging: +# print('maxcov'); print(maxcov) + for ic in range(0,ncut): +# print('check kriging cov model- icut '); print(ic) +# print('node data values for kriging'); print(cnodev) +# print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) + #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) + if ktype == 0: + gmean = gcdf[ic] + elif ktype == 2: + gmean = trend1d[index,ic] + else: + gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used +# print('gmean'); print(gmean) + ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, + icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], + rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) + # print('ccdf'); print(ccdf) -@jit(nopython=True) -def cova3(x1, y1, z1, x2, y2, z2, nst, c0, pmx, cc, aa, it, anis, anis_v, rotmat, maxcov): - """Calculate the covariance associated with a variogram model specified by a - nugget effect and nested variogram structures. - :param x1: x coordinate of first point - :param y1: y coordinate of first point - :param z1: z coordinate of first point - :param x2: x coordinate of second point - :param y2: y coordinate of second point - :param z2: z coordinate of second point - :param nst: number of nested structures (maximum of 4) - :param c0: isotropic nugget constant (TODO: not used) - :param pmx: TODO - :param cc: multiplicative factor of each nested structure - :param aa: parameter `a` of each nested structure - :param it: TODO - :param ang: TODO: not used - :param anis: Horizontal aspect ratio - :param anis_v: Vertical aspect ratio - :param rotmat: rotation matrices - :param maxcov: TODO - :return: TODO - """ - """ Revised from Wendi Liu's code """ +# Correct order relations: + ccdfo = ordrel(ivtype,ncut,ccdf) +# Draw from the local distribution: + zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + sim[index] = zval + # print('zval'); print(zval) - EPSLON = 0.000001 - # Check for very small distance - dx = x2 - x1 - dy = y2 - y1 - dz = z2 - z1 - if (dx * dx + dy * dy + dz * dz) < EPSLON: - cova3_ = maxcov - return cova3_ +# END MAIN LOOP OVER SIMULATIONS: + sim_out = np.zeros((ny,nx)) + for ind in range(0,nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1,ix] = sim[ind] - # Non-zero distance, loop over all the structures - cova3_ = 0.0 - for js in range(0, nst): - # Compute the appropriate structural distance - dx1 = dx * rotmat[0, js] + dy * rotmat[1, js] + dz * rotmat[2, js] - dy1 = (dx * rotmat[3, js] + dy * rotmat[4, js] + dz * rotmat[5, js] ) / anis[js] - dz1 = (dx * rotmat[6, js] + dy * rotmat[7, js] + dz * rotmat[8, js] ) / anis_v[js] - - - h = math.sqrt(max((dx1 * dx1 + dy1 * dy1 + dz1 * dz1 ), 0.0)) - if it[js] == 1: - # Spherical model - hr = h / aa[js] - if hr < 1.0: - cova3_ = cova3_ + cc[js] * (1.0 - hr * (1.5 - 0.5 * hr * hr)) - elif it[js] == 2: - # Exponential model - cova3_ = cova3_ + cc[js] * np.exp(-3.0 * h / aa[js]) - elif it[js] == 3: - # Gaussian model - hh = -3.0 * (h * h) / (aa[js] * aa[js]) - cova3_ = cova3_ + cc[js] * np.exp(hh) - elif it[js] == 4: - # Power model - cov1 = pmx - cc[js] * (h ** aa[js]) - cova3_ = cova3_ + cov1 - return cova3_ - + return sim_out - - -def gamv_3D( +def kb2d_locations( df, xcol, ycol, - zcol, vcol, tmin, tmax, - xlag, - xltol, - nlag, - azm, - dip, - atol, - dtol, - bandwh, - isill, + df_loc, + xcol_loc, + ycol_loc, + ndmin, + ndmax, + radius, + ktype, + skmean, + vario, ): - """GSLIB's GAMV program (Deutsch and Journel, 1998) converted from the + """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Nov, 2019). - Note simplified for 2D, semivariogram only and one direction at a time. + Austin (Jan, 2019). Version for kriging at a set of spatial locations. :param df: pandas DataFrame with the spatial data :param xcol: name of the x coordinate column :param ycol: name of the y coordinate column - :param zcol: name of the z coordinate column :param vcol: name of the property column :param tmin: property trimming limit :param tmax: property trimming limit - :param xlag: lag distance - :param xltol: lag distance tolerance - :param nlag: number of lags to calculate - :param azm: azimuth - :param dip: dip - :param atol: azimuth tolerance - :param dtol: dip tolerance - :param bandwh: horizontal bandwidth / maximum distance offset orthogonal to - azimuth - :param isill: 1 for standardize sill - :return: TODO + :param df_loc: pandas DataFrame with the locations to krige + :param xcol: name of the x coordinate column for locations to krige + :param ycol: name of the y coordinate column for locations to krige + :param ndmin: minimum number of data points to use for kriging a block + :param ndmax: maximum number of data points to use for kriging a block + :param radius: maximum isotropic search radius + :param ktype: + :param skmean: + :param vario: + :return: """ - # Load the data - # Trim values outside tmin and tmax - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] - nd = len(df_extract) # TODO: not used + +# Constants + UNEST = -999. + EPSLON = 1.0e-10 + VERSION = 2.907 + first = True + PMX = 9999.0 + MAXSAM = ndmax + 1 + MAXKD = MAXSAM + 1 + MAXKRG = MAXKD * MAXKD + +# load the variogram + nst = vario['nst'] + cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) + ang = np.zeros(nst); anis = np.zeros(nst) + + c0 = vario['nug']; + cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; + aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + if nst == 2: + cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; + aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + +# Allocate the needed memory: + xa = np.zeros(MAXSAM) + ya = np.zeros(MAXSAM) + vra = np.zeros(MAXSAM) + dist = np.zeros(MAXSAM) + nums = np.zeros(MAXSAM) + r = np.zeros(MAXKD) + rr = np.zeros(MAXKD) + s = np.zeros(MAXKD) + a = np.zeros(MAXKRG) + klist = np.zeros(len(df_loc)) # list of kriged estimates + vlist = np.zeros(len(df_loc)) + +# Load the data + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + nd = len(df_extract) + ndmax = min(ndmax,nd) x = df_extract[xcol].values y = df_extract[ycol].values - z = df_extract[zcol].values vr = df_extract[vcol].values - # Summary statistics for the data after trimming - avg = vr.mean() # TODO: not used - stdev = vr.std() - sills = stdev ** 2.0 - ssq = sills # TODO: not used - vrmin = vr.min() # TODO: not used - vrmax = vr.max() # TODO: not used +# Load the estimation loactions + nd_loc = len(df_loc) + x_loc = df_loc[xcol].values + y_loc = df_loc[ycol].values + vr_loc = df_loc[vcol].values + +# Make a KDTree for fast search of nearest neighbours + dp = list((y[i], x[i]) for i in range(0,nd)) + data_locs = np.column_stack((y,x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + +# Summary statistics for the data after trimming + avg = vr.mean() + stdev = vr.std() + ss = stdev**2.0 + vrmin = vr.min() + vrmax = vr.max() + +# Initialize accumulators: + cbb = 0.0 + rad2 = radius*radius + +# Calculate Block Covariance. Check for point kriging. + rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) + cov = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) +# Keep this value to use for the unbiasedness constraint: + unbias = cov + cbb = cov + first = False + +# MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: + nk = 0 + ak = 0.0 + vk = 0.0 + + for idata in range(len(df_loc)): + print('Working on location ' + str(idata)) + xloc = x_loc[idata] + yloc = y_loc[idata] + current_node = (yloc,xloc) + +# Find the nearest samples within each octant: First initialize +# the counter arrays: + na = -1 # accounting for 0 as first index + dist.fill(1.0e+20) + nums.fill(-1) + dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + # remove any data outside search radius + na = len(dist) + nums = nums[dist UNEST: + nk = nk + 1 + ak = ak + est + vk = vk + est*est - return dis, vario, npp - -def cova(x, y, z, vr, xlag, xltol, nlag, azm, dip, atol, dtol, bandwh): - """Calculate the variogram by looping over combinatorial of data pairs. - :param x: x values - :param y: y values - :param z: z values - :param vr: property values - :param xlag: lag distance - :param xltol: lag distance tolerance - :param nlag: number of lags to calculate - :param azm: azimuth - :param dip: dip - :param atol: azimuth tolerance - :param dtol: dip tolerance - :param bandwh: horizontal bandwidth / maximum distance offset orthogonal to - azimuth - :return: TODO - """ - # Allocate the needed memory - nvarg = 1 - mxdlv = nlag + 2 # in gamv the npp etc. arrays go to nlag + 2 - dis = np.zeros(mxdlv) - lag = np.zeros(mxdlv) # TODO: not used - vario = np.zeros(mxdlv) - hm = np.zeros(mxdlv) - tm = np.zeros(mxdlv) - hv = np.zeros(mxdlv) # TODO: not used - npp = np.zeros(mxdlv) - #ivtail = np.zeros(nvarg + 2) - #ivhead = np.zeros(nvarg + 2) - #ivtype = np.ones(nvarg + 2) - #ivtail[0] = 0 - #ivhead[0] = 0 - #ivtype[0] = 0 +# END OF MAIN LOOP OVER ALL THE BLOCKS: - EPSLON = 1.0e-20 - nd = len(x) - # The mathematical azimuth is measured counterclockwise from EW and - # not clockwise from NS as the conventional azimuth is - azmuth = (90.0 - azm) * math.pi / 180.0 - dip = (dip) * math.pi / 180.0 - uvxazm = math.cos(azmuth) * math.cos(dip) - uvyazm = math.sin(azmuth) * math.cos(dip) - uvzdip = math.sin(dip) - if atol <= 0.0: - csatol = math.cos(45.0 * math.pi / 180.0) - else: - csatol = math.cos(atol * math.pi / 180.0) - - if dtol <= 0.0: - csdtol = math.cos(30.0 * math.pi / 180.0) - else: - csdtol = math.cos(dtol * math.pi / 180.0) - - # Initialize the arrays for each direction, variogram, and lag - nsiz = nlag + 2 # TODO: not used - dismxs = ((float(nlag) + 0.5 - EPSLON) * xlag) ** 2 + if nk >= 1: + ak = ak / float(nk) + vk = vk/float(nk) - ak*ak + print(' Estimated ' + str(nk) + ' blocks ') + print(' average ' + str(ak) + ' variance ' + str(vk)) - # Main loop over all pairs - for i in range(0, nd): - for j in range(0, nd): - - # Definition of the lag corresponding to the current pair - dx = x[j] - x[i] - dy = y[j] - y[i] - dz = z[j] - z[i] - dxs = dx * dx - dys = dy * dy - dzs = dz * dz - hs = dxs + dys + dzs - if hs <= dismxs: - if hs < 0.0: - hs = 0.0 - h = np.sqrt(hs) + return klist, vlist - # Determine which lag this is and skip if outside the defined - # distance tolerance - if h <= EPSLON: - lagbeg = 0 - lagend = 0 - else: - lagbeg = -1 - lagend = -1 - for ilag in range(1, nlag + 1): - # reduced to -1 - if ( - (xlag * float(ilag - 1) - xltol) - <= h - <= (xlag * float(ilag - 1) + xltol) - ): - if lagbeg < 0: - lagbeg = ilag - lagend = ilag - if lagend >= 0: - # Definition of the direction corresponding to the current - # pair. All directions are considered (overlapping of - # direction tolerance cones is allowed) +#Partial Correlation in Python (clone of Matlab's partialcorr) - # Check for an acceptable azimuth angle - dxy = np.sqrt(max((dxs + dys), 0.0)) - dxyz = np.sqrt(max((dxs + dys + dzs), 0.0)) - if dxy < EPSLON: - dcazm = 1.0 - else: - dcazm = (dx * uvxazm + dy * uvyazm) / dxy - if dxyz < EPSLON: - dcdip = 1.0 - else: - dcdip = (dx * uvxazm + dy * uvyazm + dz * uvzdip) / dxyz - - # Check the horizontal bandwidth criteria (maximum deviation - # perpendicular to the specified direction azimuth) - band = np.cross([dx,dy,dz], [uvxazm, uvyazm, uvzdip]) - band = np.sqrt(band.dot(band)) - # Apply all the previous checks at once to avoid a lot of - # nested if statements - if (abs(dcazm) >= csatol) and (abs(dcdip) >= csdtol) and (abs(band) <= bandwh): - # Check whether or not an omni-directional variogram is - # being computed - omni = False - if atol >= 90.0: - omni = True +#This uses the linear regression approach to compute the partial correlation +#(might be slow for a huge number of variables). The algorithm is detailed here: - # For this variogram, sort out which is the tail and - # the head value - # iv = 0 # hardcoded just one variogram - # it = ivtype[iv] # TODO: not used - if dcazm >= 0.0: - vrh = vr[i] - vrt = vr[j] - if omni: - vrtpr = vr[i] - vrhpr = vr[j] - else: - vrh = vr[j] - vrt = vr[i] - if omni: - vrtpr = vr[j] - vrhpr = vr[i] +# http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression - # Reject this pair on the basis of missing values +#Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y}, +#the algorithm can be summarized as +# 1) perform a normal linear least-squares regression with X as the target and Z as the predictor +# 2) calculate the residuals in Step #1 +# 3) perform a normal linear least-squares regression with Y as the target and Z as the predictor +# 4) calculate the residuals in Step #3 +# 5) calculate the correlation coefficient between the residuals from Steps #2 and #4; +# The result is the partial correlation between X and Y while controlling for the effect of Z - # Data was trimmed at the beginning +#Date: Nov 2014 +#Author: Fabian Pedregosa-Izquierdo, f@bianp.net +#Testing: Valentina Borghesani, valentinaborghesani@gmail.com - # The Semivariogram (all other types of measures are - # removed for now) - for il in range(lagbeg, lagend + 1): - npp[il] = npp[il] + 1 - dis[il] = dis[il] + h - tm[il] = tm[il] + vrt - hm[il] = hm[il] + vrh - vario[il] = vario[il] + ((vrh - vrt) * (vrh - vrt)) - if omni: - npp[il] = npp[il] + 1.0 - dis[il] = dis[il] + h - tm[il] = tm[il] + vrtpr - hm[il] = hm[il] + vrhpr - vario[il] = vario[il] + ( - (vrhpr - vrtpr) * (vrhpr - vrtpr) - ) +def partial_corr(C): +# Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling +# for the remaining variables in C. - # Get average values for gam, hm, tm, hv, and tv, then compute the correct - # "variogram" measure - for il in range(0, nlag + 2): - i = il - if npp[i] > 0: - rnum = npp[i] - dis[i] = dis[i] / rnum - vario[i] = vario[i] / rnum - hm[i] = hm[i] / rnum - tm[i] = tm[i] / rnum +# Parameters +# C : array-like, shape (n, p) +# Array with the different variables. Each column of C is taken as a variable +# Returns +# P : array-like, shape (p, p) +# P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling +# for the remaining variables in C. - return dis, vario, npp + C = np.asarray(C) + p = C.shape[1] + P_corr = np.zeros((p, p), dtype=np.float) + for i in range(p): + P_corr[i, i] = 1 + for j in range(i+1, p): + idx = np.ones(p, dtype=np.bool) + idx[i] = False + idx[j] = False + beta_i = linalg.lstsq(C[:, idx], C[:, j])[0] + beta_j = linalg.lstsq(C[:, idx], C[:, i])[0] + res_j = C[:, j] - C[:, idx].dot( beta_i) + res_i = C[:, i] - C[:, idx].dot(beta_j) + corr = stats.pearsonr(res_i, res_j)[0] + P_corr[i, j] = corr + P_corr[j, i] = corr + return P_corr + +def semipartial_corr(C): # Michael Pyrcz modified the function above by Fabian Pedregosa-Izquierdo, f@bianp.net for semipartial correlation + C = np.asarray(C) + p = C.shape[1] + P_corr = np.zeros((p, p), dtype=np.float) + for i in range(p): + P_corr[i, i] = 1 + for j in range(i+1, p): + idx = np.ones(p, dtype=np.bool) + idx[i] = False + idx[j] = False + beta_i = linalg.lstsq(C[:, idx], C[:, j])[0] + res_j = C[:, j] - C[:, idx].dot( beta_i) + res_i = C[:, i] # just use the value, not a residual + corr = stats.pearsonr(res_i, res_j)[0] + P_corr[i, j] = corr + P_corr[j, i] = corr + return P_corr From ace4b1a6da7d3eccfe481893c60ad8aceb633193 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Thu, 31 Oct 2019 15:16:11 -0500 Subject: [PATCH 06/10] add parallelization with numba --- .gitignore | 125 ++ geostatspy/geostats.py | 2456 ++++++++++++++++++++++------------------ 2 files changed, 1498 insertions(+), 1083 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f717486 --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 09c2242..02f006d 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -16,6 +16,7 @@ """ import math # for trig functions etc. +from copy import copy # to not change objects passed by ref from bisect import bisect # for maintaining array elements sorted import numpy as np # for ndarrays @@ -26,7 +27,10 @@ from numba.typed import Dict # typing of dictionaries from statsmodels.stats.weightstats import DescrStatsW -def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): +JITKW = dict(nopython=True, cache=True, fastmath=True) + + +def backtr(df, vcol, vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar): """Back transform an entire DataFrame column with a provided transformation table and tail extrapolation. :param df: the source DataFrame :param vcol: the column with the variable to transfrom @@ -41,42 +45,45 @@ def backtr(df,vcol,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :return: TODO """ - EPSLON=1.0e-20 - nd = len(df); nt = len(vr) # number of data to transform and number of data in table + EPSLON = 1.0e-20 + nd = len(df) + nt = len(vr) # number of data to transform and number of data in table backtr = np.zeros(nd) vrgs = df[vcol].values # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): - for id in range(0,nd): + for id in range(0, nd): if vrgs[id] <= vrg[0]: backtr[id] = vr[0] - cdflo = gcum(vrg[0]) - cdfbt = gcum(vrgs[id]) + cdflo = gcum(vrg[0]) + cdfbt = gcum(vrgs[id]) if ltail == 1: - backtr[id] = powint(0.0,cdflo,zmin,vr[0],cdfbt,1.0) + backtr[id] = powint(0.0, cdflo, zmin, vr[0], cdfbt, 1.0) elif ltail == 2: - cpow = 1.0 / ltpar - backtr[id] = powint(0.0,cdflo,zmin,vr[0],cdfbt,cpow) + cpow = 1.0 / ltpar + backtr[id] = powint(0.0, cdflo, zmin, vr[0], cdfbt, cpow) # Value in the upper tail? 1=linear, 2=power, 4=hyperbolic: elif vrgs[id] >= vrg[nt-1]: backtr[id] = vr[nt-1] - cdfhi = gcum(vrg[nt-1]) - cdfbt = gcum(vrgs[id]) + cdfhi = gcum(vrg[nt-1]) + cdfbt = gcum(vrgs[id]) if utail == 1: - backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) + backtr[id] = powint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, 1.0) elif utail == 2: - cpow = 1.0 / utpar - backtr[id] = powint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) + cpow = 1.0 / utpar + backtr[id] = powint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, cpow) elif utail == 4: plambda = (vr[nt-1]**utpar)*(1.0-gcum(vrg[nt-1])) backtr[id] = (plambda/(1.0-gcum(vrgs)))**(1.0/utpar) else: # Value within the transformation table: - j = locate(vrg,1,nt,vrgs[id]) - j = max(min((nt-2),j),1) - backtr[id] = powint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs[id],1.0) + j = locate(vrg, 1, nt, vrgs[id]) + j = max(min((nt-2), j), 1) + backtr[id] = powint(vrg[j], vrg[j+1], vr[j], + vr[j+1], vrgs[id], 1.0) return backtr -def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): + +def backtr_value(vrgs, vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar): """Back transform a single value with a provided transformation table and tail extrapolation. :param vrgs: value to transform :param vr: the transformation table, 1D ndarray with the original values @@ -89,38 +96,39 @@ def backtr_value(vrgs,vr,vrg,zmin,zmax,ltail,ltpar,utail,utpar): :param utpar: upper tail extrapolation parameter :return: TODO """ - EPSLON=1.0e-20 - nt = len(vr) # number of data to transform + EPSLON = 1.0e-20 + nt = len(vr) # number of data to transform # Value in the lower tail? 1=linear, 2=power, (3 and 4 are invalid): if vrgs <= vrg[0]: backtr = vr[0] - cdflo = gcum(vrg[0]) - cdfbt = gcum(vrgs) + cdflo = gcum(vrg[0]) + cdfbt = gcum(vrgs) if ltail == 1: - backtr = dpowint(0.0,cdflo,zmin,vr[0],cdfbt,1.0) + backtr = dpowint(0.0, cdflo, zmin, vr[0], cdfbt, 1.0) elif ltail == 2: - cpow = 1.0 / ltpar - backtr = dpowint(0.0,cdflo,zmin,vr[0],cdfbt,cpow) + cpow = 1.0 / ltpar + backtr = dpowint(0.0, cdflo, zmin, vr[0], cdfbt, cpow) # Value in the upper tail? 1=linear, 2=power, 4=hyperbolic: elif vrgs >= vrg[nt-1]: backtr = vr[nt-1] - cdfhi = gcum(vrg[nt-1]) - cdfbt = gcum(vrgs) + cdfhi = gcum(vrg[nt-1]) + cdfbt = gcum(vrgs) if utail == 1: - backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,1.0) + backtr = dpowint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, 1.0) elif utail == 2: - cpow = 1.0 / utpar - backtr = dpowint(cdfhi,1.0,vr[nt-1],zmax,cdfbt,cpow) + cpow = 1.0 / utpar + backtr = dpowint(cdfhi, 1.0, vr[nt-1], zmax, cdfbt, cpow) elif utail == 4: plambda = (vr[nt-1]**utpar)*(1.0-gcum(vrg[nt-1])) backtr = (plambda/(1.0-gcum(vrgs)))**(1.0/utpar) else: -# Value within the transformation table: - j = dlocate(vrg,1,nt,vrgs) - j = max(min((nt-2),j),1) - backtr = dpowint(vrg[j],vrg[j+1],vr[j],vr[j+1],vrgs,1.0) + # Value within the transformation table: + j = dlocate(vrg, 1, nt, vrgs) + j = max(min((nt-2), j), 1) + backtr = dpowint(vrg[j], vrg[j+1], vr[j], vr[j+1], vrgs, 1.0) return backtr + def gcum(x): """Calculate the cumulative probability of the standard normal distribution. :param x: the value from the standard normal distribution @@ -130,19 +138,21 @@ def gcum(x): z = x if z < 0: z = -z - t = 1./(1.+ 0.2316419*z) - gcum = t*(0.31938153 + t*(-0.356563782 + t*(1.781477937 + t*(-1.821255978 + t*1.330274429)))) - e2 = 0. + t = 1./(1. + 0.2316419*z) + gcum = t*(0.31938153 + t*(-0.356563782 + t * + (1.781477937 + t*(-1.821255978 + t*1.330274429)))) + e2 = 0. # 6 standard deviations out gets treated as infinity: if z <= 6.: e2 = np.exp(-z*z/2.)*0.3989422803 - gcum = 1.0- e2 * gcum + gcum = 1.0 - e2 * gcum if x >= 0: return gcum gcum = 1.0 - gcum return gcum -def locate(xx,iis,iie,x): + +def locate(xx, iis, iie, x): """Return value `j` such that `x` is between `xx[j]` and `xx[j+1]`, where `xx` is an array of length `n`, and `x` is a given value. `xx` must be monotonic, either increasing or decreasing (GSLIB version). @@ -176,6 +186,7 @@ def locate(xx,iis,iie,x): j = jl return j + def dlocate(xx, iis, iie, x): """Return value `j` such that `x` is between `xx[j]` and `xx[j+1]`, where `xx` is an array of length `n`, and `x` is a given value. `xx` must be @@ -194,7 +205,8 @@ def dlocate(xx, iis, iie, x): j = bisect(array, x) return j -def powint(xlow,xhigh,ylow,yhigh,xval,power): + +def powint(xlow, xhigh, ylow, yhigh, xval, power): """Power-based interpolator :param xlow: x lower interval :param xhigh: x upper interval @@ -204,7 +216,7 @@ def powint(xlow,xhigh,ylow,yhigh,xval,power): :param power: power for interpolation :return: TODO """ - EPSLON=1.0e-20 + EPSLON = 1.0e-20 if (xhigh-xlow) < EPSLON: powint = (yhigh+ylow)/2.0 else: @@ -270,6 +282,7 @@ def dsortem(ib, ie, a, iperm, b=0, c=0, d=0, e=0, f=0, g=0, h=0): h = h_slice[inds] return a, b, c, d, e, f, g, h # TODO: changed from 'a, b, c, d, e, f, h' + def gauinv(p): """Compute the inverse of the standard normal cumulative distribution function. @@ -366,26 +379,31 @@ def dpowint(xlow, xhigh, ylow, yhigh, xval, pwr): ) return dpowint_ -#@jit(nopython=True) # all NumPy array operations included in this function for precompile with NumBa -def setup_rotmat2(c0,nst,it,cc,ang): - DTOR=3.14159265/180.0; EPSLON=0.000000; PI=3.141593 +# @jit(**JITKW) # all NumPy array operations included in this function for precompile with NumBa + + +def setup_rotmat2(c0, nst, it, cc, ang): + DTOR = 3.14159265/180.0 + EPSLON = 0.000000 + PI = 3.141593 # The first time around, re-initialize the cosine matrix for the # variogram structures: - rotmat = np.zeros((4,nst)) + rotmat = np.zeros((4, nst)) maxcov = c0 - for js in range(0,nst): + for js in range(0, nst): azmuth = (90.0-ang[js])*DTOR - rotmat[0,js] = math.cos(azmuth) - rotmat[1,js] = math.sin(azmuth) - rotmat[2,js] = -1*math.sin(azmuth) - rotmat[3,js] = math.cos(azmuth) + rotmat[0, js] = math.cos(azmuth) + rotmat[1, js] = math.sin(azmuth) + rotmat[2, js] = -1*math.sin(azmuth) + rotmat[3, js] = math.cos(azmuth) if it[js] == 4: maxcov = maxcov + 9999.9 else: maxcov = maxcov + cc[js] return rotmat, maxcov -@jit(nopython=True) + +@jit(**JITKW) def setup_rotmat(c0, nst, it, cc, ang, pmx): """Setup rotation matrix. :param c0: nugget constant (isotropic) @@ -416,7 +434,7 @@ def setup_rotmat(c0, nst, it, cc, ang, pmx): return rotmat, maxcov -@jit(nopython=True) +@jit(**JITKW) def cova2(x1, y1, x2, y2, nst, c0, pmx, cc, aa, it, ang, anis, rotmat, maxcov): """Calculate the covariance associated with a variogram model specified by a nugget effect and nested variogram structures. @@ -471,18 +489,20 @@ def cova2(x1, y1, x2, y2, nst, c0, pmx, cc, aa, it, ang, anis, rotmat, maxcov): cova2_ = cova2_ + cov1 return cova2_ -def sqdist(x1,y1,z1,x2,y2,z2,ind,rotmat): -# Compute component distance vectors and the squared distance: + +def sqdist(x1, y1, z1, x2, y2, z2, ind, rotmat): + # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 dz = 0.0 sqdist = 0.0 - for i in range(0,2): - cont = rotmat[ind,i,1] * dx + rotmat[ind,i,2] * dy + for i in range(0, 2): + cont = rotmat[ind, i, 1] * dx + rotmat[ind, i, 2] * dy sqdist = sqdist + cont * cont return sqdist -def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): + +def sqdist2(x1, y1, x2, y2, ist, rotmat, anis): """Calculate the 2D square distance based on geometric ani :param x1: x coordinate of first point :param y1: y coordinate of first point @@ -497,65 +517,68 @@ def sqdist2(x1,y1,x2,y2,ist,rotmat,anis): # Compute component distance vectors and the squared distance: dx = x1 - x2 dy = y1 - y2 - dx1 = (dx*rotmat[0,ist] + dy*rotmat[1,ist]) - dy1 = (dx*rotmat[2,ist] + dy*rotmat[3,ist])/anis[ist] + dx1 = (dx*rotmat[0, ist] + dy*rotmat[1, ist]) + dy1 = (dx*rotmat[2, ist] + dy*rotmat[3, ist])/anis[ist] sqdist_ = (dx1*dx1+dy1*dy1) return sqdist_ -def setrot(ang1,ang2,sang1,anis1,anis2,sanis1,nst,MAXROT): + +def setrot(ang1, ang2, sang1, anis1, anis2, sanis1, nst, MAXROT): """GSLIB's SETROT subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ - DEG2RAD = 3.141592654/180.0; EPSLON=1.e-20 - rotmat = np.zeros((MAXROT+1,3,3)) + DEG2RAD = 3.141592654/180.0 + EPSLON = 1.e-20 + rotmat = np.zeros((MAXROT+1, 3, 3)) if ang1 >= 0.0 and ang1 < 270.0: - alpha = (90.0 - ang1) * DEG2RAD + alpha = (90.0 - ang1) * DEG2RAD else: - alpha = (450.0 - ang1) * DEG2RAD + alpha = (450.0 - ang1) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac1 = 1.0 / (max(anis1,EPSLON)) - rotmat[0,1,1] = cosa - rotmat[0,1,2] = sina - rotmat[0,2,1] = afac1*(-sina) - rotmat[0,2,2] = afac1*(cosa) + afac1 = 1.0 / (max(anis1, EPSLON)) + rotmat[0, 1, 1] = cosa + rotmat[0, 1, 2] = sina + rotmat[0, 2, 1] = afac1*(-sina) + rotmat[0, 2, 2] = afac1*(cosa) # 2nd structure if present if nst > 1: if ang2 >= 0.0 and ang2 < 270.0: - alpha = (90.0 - ang2) * DEG2RAD + alpha = (90.0 - ang2) * DEG2RAD else: - alpha = (450.0 - ang2) * DEG2RAD + alpha = (450.0 - ang2) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac2 = 1.0 / (max(anis2,EPSLON)) - rotmat[1,1,1] = cosa - rotmat[1,1,2] = sina - rotmat[1,2,1] = afac1*(-sina) - rotmat[1,2,2] = afac1*(cosa) + afac2 = 1.0 / (max(anis2, EPSLON)) + rotmat[1, 1, 1] = cosa + rotmat[1, 1, 2] = sina + rotmat[1, 2, 1] = afac1*(-sina) + rotmat[1, 2, 2] = afac1*(cosa) # search rotation if sang1 >= 0.0 and sang1 < 270.0: - alpha = (90.0 - sang1) * DEG2RAD + alpha = (90.0 - sang1) * DEG2RAD else: - alpha = (450.0 - sang1) * DEG2RAD + alpha = (450.0 - sang1) * DEG2RAD # Get the required sines and cosines: - sina = math.sin(alpha) - cosa = math.cos(alpha) + sina = math.sin(alpha) + cosa = math.cos(alpha) # Construct the rotation matrix in the required memory: - afac1 = 1.0 / (max(sanis1,EPSLON)) - rotmat[MAXROT,1,1] = cosa - rotmat[MAXROT,1,2] = sina - rotmat[MAXROT,2,1] = afac1*(-sina) - rotmat[MAXROT,2,2] = afac1*(cosa) + afac1 = 1.0 / (max(sanis1, EPSLON)) + rotmat[MAXROT, 1, 1] = cosa + rotmat[MAXROT, 1, 2] = sina + rotmat[MAXROT, 2, 1] = afac1*(-sina) + rotmat[MAXROT, 2, 2] = afac1*(cosa) # Return to calling program: return rotmat -@jit(nopython=True) + +@jit(**JITKW) def ksol_numpy(neq, a, r): """Find solution of a system of linear equations. :param neq: number of equations @@ -571,7 +594,8 @@ def ksol_numpy(neq, a, r): s = ainv @ r # matrix multiplication return s -def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,global_rotmat,radsqd): + +def ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MAXXYZ, xsiz, ysiz, isrot, nx, ny, nst, c0, cc, aa, it, ang, anis, global_rotmat, radsqd): """GSLIB's CTABLE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -580,46 +604,48 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa # Declare constants TINY = 1.0e-10 PMX = 9999.9 - MAXROT=2 + MAXROT = 2 # Size of the look-up table: tmp = np.zeros(MAXXYZ) MAXORD = MAXXYZ - if (nx*ny) < MAXCXY: + if (nx*ny) < MAXCXY: MAXORD = MAXCXY order = np.zeros(MAXORD) - nctx = int(min(((MAXCTX-1)/2),(nx-1))) - ncty = int(min(((MAXCTY-1)/2),(ny-1))) + nctx = int(min(((MAXCTX-1)/2), (nx-1))) + ncty = int(min(((MAXCTY-1)/2), (ny-1))) # print('CTable check') # print('nctx ' + str(nctx) + ', ncty ' + str(ncty)) ixnode = np.zeros(MAXXYZ) iynode = np.zeros(MAXXYZ) - covtab = np.zeros((MAXCTX,MAXCTY)) + covtab = np.zeros((MAXCTX, MAXCTY)) # Initialize the covariance subroutine and cbb at the same time: - rotmat, maxcov = setup_rotmat2(c0,nst,it,cc,ang) - cbb = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat2(c0, nst, it, cc, ang) + cbb = cova2(0.0, 0.0, 0.0, 0.0, nst, c0, PMX, cc, + aa, it, ang, anis, rotmat, maxcov) # Now, set up the table and keep track of the node offsets that are # within the search radius: - nlooku = -1 # adjusted for 0 origin - for i in range(-nctx,nctx+1): # cover entire range + nlooku = -1 # adjusted for 0 origin + for i in range(-nctx, nctx+1): # cover entire range xx = i * xsiz ic = nctx + i - for j in range(-ncty,ncty+1): # cover entire range + for j in range(-ncty, ncty+1): # cover entire range yy = j * ysiz jc = ncty + j - covtab[ic,jc] = cova2(0.0,0.0,xx,yy,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + covtab[ic, jc] = cova2( + 0.0, 0.0, xx, yy, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) # print('cov table offset'); print(xx,yy); print(covtab[ic,jc]) - hsqd = sqdist(0.0,0.0,0.0,xx,yy,0.0,MAXROT,global_rotmat) + hsqd = sqdist(0.0, 0.0, 0.0, xx, yy, 0.0, MAXROT, global_rotmat) if hsqd <= radsqd: nlooku = nlooku + 1 # We want to search by closest variogram distance (and use the # anisotropic Euclidean distance to break ties: - tmp[nlooku] = - (covtab[ic,jc] - TINY*hsqd) + tmp[nlooku] = - (covtab[ic, jc] - TINY*hsqd) order[nlooku] = (jc)*MAXCTX+ic # print('populated presort'); print(tmp,order) # Finished setting up the look-up table, now order the nodes such @@ -629,19 +655,20 @@ def ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MAXXYZ,xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa nlooku = nlooku + 1 # print('nlooku' + str(nlooku)); print('MAXCTX' + str(MAXCTX)) - tmp, order = dsortem(0,nlooku,tmp,2,b=order) + tmp, order = dsortem(0, nlooku, tmp, 2, b=order) # print('populated postsort'); print(tmp,order) - for il in range(0,nlooku): + for il in range(0, nlooku): loc = int(order[il]) - iy = int((loc-0)/MAXCTX) - ix = loc - (iy-0)*MAXCTX + iy = int((loc-0)/MAXCTX) + ix = loc - (iy-0)*MAXCTX iynode[il] = int(iy) ixnode[il] = int(ix) # print('populated ix, iy node list'); print(ixnode, iynode) - return covtab,tmp,order,ixnode,iynode,nlooku,nctx,ncty + return covtab, tmp, order, ixnode, iynode, nlooku, nctx, ncty + -def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nctx,ncty,UNEST): +def srchnd(ix, iy, nx, ny, xmn, ymn, xsiz, ysiz, sim, noct, nodmax, ixnode, iynode, nlooku, nctx, ncty, UNEST): """GSLIB's SRCHND subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -649,25 +676,31 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc """ # Consider all the nearby nodes until enough have been found: - ncnode = 0; - icnode = np.zeros(nodmax,dtype=int); icnode.fill(-1) - cnodev = np.zeros(nodmax);cnodex = np.zeros(nodmax); cnodey = np.zeros(nodmax); + ncnode = 0 + icnode = np.zeros(nodmax, dtype=int) + icnode.fill(-1) + cnodev = np.zeros(nodmax) + cnodex = np.zeros(nodmax) + cnodey = np.zeros(nodmax) # print('Node search at '); print(ix,iy) # print('nlooku'); print(nlooku) if noct > 0: ninoct = np.zeros(8) - for il in range(0,nlooku): - if ncnode == nodmax: return ncnode, icnode, cnodev, cnodex, cnodey + for il in range(0, nlooku): + if ncnode == nodmax: + return ncnode, icnode, cnodev, cnodex, cnodey i = ix + (int(ixnode[il])-nctx) j = iy + (int(iynode[il])-ncty) # print('i,j'); print(i,j) - if i < 0 or j < 0: continue - if i >= nx or j >= ny: continue + if i < 0 or j < 0: + continue + if i >= nx or j >= ny: + continue ind = i + (j)*nx if sim[ind] > UNEST: icnode[ncnode] = il - cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin + cnodex[ncnode] = xmn + (i)*xsiz # adjust for 0 origin cnodey[ncnode] = ymn + (j)*ysiz cnodev[ncnode] = sim[ind] # print('srchnd found at index - ' +str(ind) + ' at x and y ' + str(cnodex[ncnode]) + ',' + str(cnodey[ncnode])) @@ -676,16 +709,18 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc ncnode = ncnode + 1 # moved to account for origin 0 return ncnode, icnode, cnodev, cnodex, cnodey -def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval): + +def beyond(ivtype, nccut, ccut, ccdf, ncut, cut, cdf, zmin, zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval): """GSLIB's BEYOND subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ - EPSLON = 1.0e-20; UNEST=-1.0 + EPSLON = 1.0e-20 + UNEST = -1.0 # Check for both "zval" and "cdfval" defined or undefined: - ierr = 1; + ierr = 1 if zval > UNEST and cdfva > UNEST: return -1 if zval <= UNEST and cdfval <= UNEST: @@ -694,7 +729,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Handle the case of a categorical variable: if ivtype == 0: cum = 0 - for i in range(0,nccut): + for i in range(0, nccut): cum = cum + ccdf[i] if cdfval <= cum: zval = ccut[i] @@ -704,7 +739,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Figure out what part of distribution: ipart = 0 - lower tail # ipart = 1 - middle # ipart = 2 - upper tail - ierr = 0 + ierr = 0 ipart = 1 if zva > UNEST: if zval <= ccut[0]: @@ -720,122 +755,136 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # ARE WE IN THE LOWER TAIL? if ipart == 0: - if ltail ==1: -# Straight Linear Interpolation: + if ltail == 1: + # Straight Linear Interpolation: powr = 1.0 if zval > UNEST: - cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,powr) + cdfval = powint(zmin, ccut[0], 0.0, ccdf[0], zval, powr) else: - zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) + zval = powint(0.0, ccdf[0], zmin, ccut[0], cdfval, powr) elif ltail == 2: -# Power Model interpolation to lower limit "zmin"? - if zval > UNEST: - cdfval = powint(zmin,ccut[0],0.0,ccdf[0],zval,ltpar) - else: - powr = 1.0 / ltpar - zval = powint(0.0,ccdf[0],zmin,ccut[0],cdfval,powr) + # Power Model interpolation to lower limit "zmin"? + if zval > UNEST: + cdfval = powint(zmin, ccut[0], 0.0, ccdf[0], zval, ltpar) + else: + powr = 1.0 / ltpar + zval = powint(0.0, ccdf[0], zmin, ccut[0], cdfval, powr) # Linear interpolation between the rescaled global cdf? elif ltail == 3: if zval > UNEST: -# Computing the cdf value. Locate the point and the class bound: - idat = locate(cut,1,ncut,zval) - iupp = locate(cut,ncut,1,ncut,ccut[0]) + # Computing the cdf value. Locate the point and the class bound: + idat = locate(cut, 1, ncut, zval) + iupp = locate(cut, ncut, 1, ncut, ccut[0]) # Straight linear interpolation if no data; otherwise, linear: - if idat <= -1 or idat >= ncut -1 or iupp <= -1 or iupp >= ncut-1: # modfity for 0 index - cdfval = powint(zmin,cut[0],0.0,cdf[0],zval,1.) + if idat <= -1 or idat >= ncut - 1 or iupp <= -1 or iupp >= ncut-1: # modfity for 0 index + cdfval = powint(zmin, cut[0], 0.0, cdf[0], zval, 1.) else: - temp = powint(cut[idat],cut[idat+1],cdf[idat],cdf[idat+1],zval,1.) + temp = powint(cut[idat], cut[idat+1], + cdf[idat], cdf[idat+1], zval, 1.) cdfval = temp*ccdf[0]/cdf[iupp] else: -# Computing Z value: Are there any data out in the tail? + # Computing Z value: Are there any data out in the tail? - iupp = locate(cut,ncut,1,ncut,ccut[0]) + iupp = locate(cut, ncut, 1, ncut, ccut[0]) # Straight linear interpolation if no data; otherwise, local linear # interpolation: if iupp <= 0 or iupp >= ncut: - zval = powint(0.0,cdf[0],zmin,cut[0],cdfval,1.) + zval = powint(0.0, cdf[0], zmin, cut[0], cdfval, 1.) else: temp = cdfval*cdf[iupp]/ccdf[1] - idat = locate(cdf,ncut,1,ncut,temp) + idat = locate(cdf, ncut, 1, ncut, temp) if idat <= -1 or idat >= ncut-1: # adjusted for 0 origin - zval = powint(0.0,cdf[0],zmin,cut[0],cdfval,1.) + zval = powint(0.0, cdf[0], zmin, cut[0], cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[dat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[dat], cut[idat+1], temp, 1.) else: -# Error situation - unacceptable option: - ierr = 2 - return -1 + # Error situation - unacceptable option: + ierr = 2 + return -1 # FINISHED THE LOWER TAIL, ARE WE IN THE MIDDLE? if ipart == 1: -# Establish the lower and upper limits: + # Establish the lower and upper limits: if zval > UNEST: - cclow = locate(ccut,1,nccut,zval) + cclow = locate(ccut, 1, nccut, zval) else: - cclow = locate(ccdf,1,nccut,cdfval) + cclow = locate(ccdf, 1, nccut, cdfval) cchigh = cclow + 1 if middle == 1: -# Straight Linear Interpolation: + # Straight Linear Interpolation: powr = 1.0 if zval > UNEST: - cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,powr) + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, powr) else: - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, powr) # Power interpolation between class bounds? elif middle == 2: - if zval > UNEST: - cdfval = powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,mpar) - else: - powr = 1.0 / mpar - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,powr) + if zval > UNEST: + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, mpar) + else: + powr = 1.0 / mpar + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, powr) # Linear interpolation between the rescaled global cdf? elif middle == 3: - ilow = locate(cut,ncut,1,ncut,ccut[cclow]) - iupp = locate(cut,ncut,1,ncut,ccut[cchigh]) + ilow = locate(cut, ncut, 1, ncut, ccut[cclow]) + iupp = locate(cut, ncut, 1, ncut, ccut[cchigh]) if cut[ilow] < ccut[cclow]: ilow = ilow + 1 - if cut[iupp] > ccut[cchigh]: + if cut[iupp] > ccut[cchigh]: iupp = iupp - 1 if zval > UNEST: - idat = locate(cut,1,ncut,zval) + idat = locate(cut, 1, ncut, zval) # Straight linear interpolation if no data; otherwise, local linear # interpolation: if idat <= -1 or idat >= ncut-1 or ilow <= -1 or ilow >= ncut-1 or iupp <= -1 or iupp >= ncut-1 or iupp <= ilow: - cdfval=powint(ccut[cclow],ccut[cchigh],ccdf[cclow],ccdf[cchigh],zval,1.) + cdfval = powint(ccut[cclow], ccut[cchigh], + ccdf[cclow], ccdf[cchigh], zval, 1.) else: - temp = powint(cut[idat],cut[idat+1],cdf[idat],cdf[idat+1],zval,1.) - cdfval=powint(cdf[ilow],cdf[iupp],ccdf[cclow],ccdf[cchigh],temp,1.) + temp = powint(cut[idat], cut[idat+1], + cdf[idat], cdf[idat+1], zval, 1.) + cdfval = powint(cdf[ilow], cdf[iupp], + ccdf[cclow], ccdf[cchigh], temp, 1.) else: -# Straight linear interpolation if no data; otherwise, local linear -# interpolation: + # Straight linear interpolation if no data; otherwise, local linear + # interpolation: if ilow <= -1 or ilow >= ncut-1 or iup <= -1 or iupp >= ncut-1 or iupp < ilow: - zval=powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, 1.) else: - temp=powint(ccdf[cclow],ccdf[cchigh],cdf[ilow],cdf[iupp],cdfval,1.) - idat = locate(cdf,1,ncut,temp) + temp = powint(ccdf[cclow], ccdf[cchigh], + cdf[ilow], cdf[iupp], cdfval, 1.) + idat = locate(cdf, 1, ncut, temp) if cut[idat] < ccut[cclow]: - idat=idat+1 + idat = idat+1 if idat <= -1 or idat >= ncut-1 or cut[idat+1] > ccut[cchigh]: - zval = powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) + zval = powint(ccdf[cclow], ccdf[cchigh], + ccut[cclow], ccut[cchigh], cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) else: -# Error situation - unacceptable option: + # Error situation - unacceptable option: ierr = 2 return -1 @@ -844,25 +893,29 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar if utail == 1: powr = 1.0 if zval > UNEST: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,powr) + cdfval = powint(ccut(nccut), zmax, + ccdf(nccut), 1.0, zval, powr) else: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, powr) elif utail == 2: -# Power interpolation to upper limit "utpar"? + # Power interpolation to upper limit "utpar"? if zval > UNEST: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,utpar) + cdfval = powint(ccut(nccut), zmax, ccdf( + nccut), 1.0, zval, utpar) else: powr = 1.0 / utpar - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,powr) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, powr) # Linear interpolation between the rescaled global cdf? elif utail == 3: if zval > UNEST: -# Approximately Locate the point and the class bound: - idat = locate(cut,1,ncut,zval,idat) - ilow = locate(cut,1,ncut,ccut(nccut),ilow) + # Approximately Locate the point and the class bound: + idat = locate(cut, 1, ncut, zval, idat) + ilow = locate(cut, 1, ncut, ccut(nccut), ilow) if cut[idat] < zval: idat = idat + 1 if cut[ilow] < ccut[nccut-1]: @@ -871,48 +924,52 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Straight linear interpolation if no data; otherwise, local linear # interpolation: if idat < -1 or idat >= ncut-1 or ilow <= -1 or ilow >= ncut-1: - cdfval = powint(ccut(nccut),zmax,ccdf(nccut),1.0,zval,1.) + cdfval = powint(ccut(nccut), zmax, + ccdf(nccut), 1.0, zval, 1.) else: - temp = powint(cut(idat),cut(idat+1),cdf(idat),cdf(idat+1),zval,1.) - cdfval = powint(cdf(ilow),1.0,ccdf(nccut),1.0,temp,1.) + temp = powint(cut(idat), cut(idat+1), + cdf(idat), cdf(idat+1), zval, 1.) + cdfval = powint(cdf(ilow), 1.0, ccdf(nccut), 1.0, temp, 1.) else: -# Computing Z value: Are there any data out in the tail? - ilow = locate(cut,ncut,1,ncut,ccut(nccut),ilow) + # Computing Z value: Are there any data out in the tail? + ilow = locate(cut, ncut, 1, ncut, ccut(nccut), ilow) if cut[ilow] < ccut[nccut-1]: ilow = ilow + 1 # Straight linear interpolation if no data; otherwise, local linear # interpolation: if ilow <= -1 or ilow >= ncut-1: - zval = powint(ccdf(nccut),1.0,ccut(nccut),zmax,cdfval,1.) + zval = powint(ccdf(nccut), 1.0, ccut( + nccut), zmax, cdfval, 1.) else: - temp = powint(ccdf(nccut),1.0,cdf(ilow),1.0,cdfval,1.) - idat = locate(cdf,ncut,1,ncut,temp) + temp = powint(ccdf(nccut), 1.0, cdf(ilow), 1.0, cdfval, 1.) + idat = locate(cdf, ncut, 1, ncut, temp) if cut[idat] < ccut[nccut-1]: - idat=idat+1 + idat = idat+1 if idat >= ncut-1: - zval = powint(ccdf[nccut-1],1.0,ccut[nccut-1],zmax,cdfval,1.) + zval = powint(ccdf[nccut-1], 1.0, + ccut[nccut-1], zmax, cdfval, 1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) + zval = powint(cdf[idat], cdf[idat+1], + cut[idat], cut[idat+1], temp, 1.) # Fit a Hyperbolic Distribution? elif utail == 4: -# Figure out "lambda" and required info: - lambd = math.pow(ccut[nccut],utpar)*(1.0-ccdf[nccut-1]) + # Figure out "lambda" and required info: + lambd = math.pow(ccut[nccut], utpar)*(1.0-ccdf[nccut-1]) if zval > UNEST: - cdfval = 1.0 - (lambd/(math.pow(zval,utpar))) + cdfval = 1.0 - (lambd/(math.pow(zval, utpar))) else: - zval = (lambd/math.pow((1.0-cdfval),(1.0/utpar))) + zval = (lambd/math.pow((1.0-cdfval), (1.0/utpar))) else: -# Error situation - unacceptable option: + # Error situation - unacceptable option: ierr = 2 return -1 - if zval < zmin: zval = zmin if zval > zmax: @@ -922,14 +979,15 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar return zval -def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,ncty,icnode,ixnode,iynode,cnodev,cnodex,cnodey,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov,MAXCTX,MAXCTY,MAXKR1,MAXKR2): + +def krige(ix, iy, nx, ny, xx, yy, lktype, x, y, vr, sec, colocorr, lvm, close, covtab, nctx, ncty, icnode, ixnode, iynode, cnodev, cnodex, cnodey, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov, MAXCTX, MAXCTY, MAXKR1, MAXKR2): """GSLIB's KRIGE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. """ EPSLON = 1.0e-20 - cur_index = ix + (iy)*nx + cur_index = ix + (iy)*nx # print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) @@ -940,9 +998,14 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) - vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) - r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + vra = np.zeros(MAXKR1) + vrea = np.zeros(MAXKR1) + r = np.zeros(MAXKR1) + rr = np.zeros(MAXKR1) + s = np.zeros(MAXKR1) + a = np.zeros(MAXKR2) + cbb = cova2(0, 0, 0, 0, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print(r.shape) # Local mean if lktype == 2: @@ -952,84 +1015,91 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # Size of the kriging system: first = False - na = nclose + ncnode + na = nclose + ncnode # print('lktype' + str(lktype)) - if lktype == 0: neq = na + if lktype == 0: + neq = na if lktype == 1: -# print('ordinary kriging') + # print('ordinary kriging') + neq = na + 1 + if lktype == 2: + neq = na + if lktype == 3: + neq = na + 2 + if lktype == 4: neq = na + 1 - if lktype == 2: neq = na - if lktype == 3: neq = na + 2 - if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) # Set up kriging matrices: - iin=-1 # acocunting for 0 origin + iin = -1 # acocunting for 0 origin # print('krige na' + str(na)) - for j in range(0,na): + for j in range(0, na): -# Sort out the actual location of point "j" - if j < nclose: # adjusted for 0 index origin - index = int(close[j]) - x1 = x[index] - y1 = y[index] + # Sort out the actual location of point "j" + if j < nclose: # adjusted for 0 index origin + index = int(close[j]) + x1 = x[index] + y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) if sec.shape[0] > 1: - vrea[j]= sec[index]; + vrea[j] = sec[index] else: - vrea[j] = 0.0 # added this - no effect - if lktype == 2: vra[j] = vra[j] - vrea[j] + vrea[j] = 0.0 # added this - no effect + if lktype == 2: + vra[j] = vra[j] - vrea[j] else: -# It is a previously simulated node (keep index for table look-up): -# print(j) - index = j-(nclose) # adjust for 0 index - x1 = cnodex[index] - y1 = cnodey[index] + # It is a previously simulated node (keep index for table look-up): + # print(j) + index = j-(nclose) # adjust for 0 index + x1 = cnodex[index] + y1 = cnodey[index] vra[j] = cnodev[index] - ind = icnode[index] + ind = icnode[index] # print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - ix1 = ix + (int(ixnode[ind])-nctx-1) - iy1 = iy + (int(iynode[ind])-ncty-1) + ix1 = ix + (int(ixnode[ind])-nctx-1) + iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) - index = ix1 + (iy1-1)*nx + index = ix1 + (iy1-1)*nx if lktype == 2: - vrea[j]= lvm[index] + vrea[j] = lvm[index] vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix -# print('kriging indice populated' + str(j) + ',' + str(i)) -# Sort out the actual location of point "i" + for i in range(0, na): # we need the full matrix + # print('kriging indice populated' + str(j) + ',' + str(i)) + # Sort out the actual location of point "i" if i < nclose: - index = int(close[i]) # adjust for 0 index - x2 = x[index] - y2 = y[index] + index = int(close[i]) # adjust for 0 index + x2 = x[index] + y2 = y[index] else: -# It is a previously simulated node (keep index for table look-up): + # It is a previously simulated node (keep index for table look-up): #print('i = ' + str(i) + ',nclose = ' + str(nclose) + ', na = ' + str(na)) - index = i-(nclose) - x2 = cnodex[index] - y2 = cnodey[index] - ind = icnode[index] + index = i-(nclose) + x2 = cnodex[index] + y2 = cnodey[index] + ind = icnode[index] # print('previous node index' + str(ind)) - ix2 = ix + (int(ixnode[ind])-nctx-1) - iy2 = iy + (int(iynode[ind])-ncty-1) + ix2 = ix + (int(ixnode[ind])-nctx-1) + iy2 = iy + (int(iynode[ind])-ncty-1) # Now, get the covariance value: iin = iin + 1 # print('kriging data location = '); print(x2,y2) # Decide whether or not to use the covariance look-up table: if j <= nclose or i <= nclose: - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) a[iin] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix1 - ix2) -# jj = ncty + 1 + (iy1 - iy2) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix1 - ix2) + # jj = ncty + 1 + (iy1 - iy2) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: @@ -1039,8 +1109,9 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # Get the RHS value (possibly with covariance look-up table): if j <= nclose: -# print(cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print(cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) @@ -1048,19 +1119,20 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct r[j] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix - ix1) -# jj = ncty + 1 + (iy - iy1) - -# print('RHS ctable coord' + str(ii) + ',' + str(jj)) -# print('ix,iy ='); print(ix,iy) -# print('ix1,iy1'); print(ix1,iy1) -# if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 -# print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) -# else: -# cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix - ix1) + # jj = ncty + 1 + (iy - iy1) + + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) + # print('ix,iy ='); print(ix,iy) + # print('ix1,iy1'); print(ix1,iy1) + # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 + # print('Not using covariance table') + # cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # else: + # cov = covtab[ii,jj] + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) @@ -1069,54 +1141,59 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] - if lktype == 1: # we need the full array + if lktype == 1: # we need the full array iin = iin + 1 a[iin] = 1.0 - if lktype == 4: # we need the full array + if lktype == 4: # we need the full array iin = iin + 1 a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = 1.0 - iin = iin + 1 - a[iin] = 0.0 - r[na] = 1.0 + iin = iin + 1 + a[iin] = 0.0 + r[na] = 1.0 rr[na] = 1.0 # Addition of the External Drift Constraint: if lktype == 3: - edmin = 999999. + edmin = 999999. edmax = -999999. - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = vrea(i) - if a[iin] edmax: edmax = a[iin] - iin = iin + 1 - a[iin] = 0.0 - iin = iin + 1 - a[iin] = 0.0 - ind = ix + (iy-1)*nx - r[na+1] = lvm[ind] + if a[iin] < edmin: + edmin = a[iin] + if a[iin] > edmax: + edmax = a[iin] + iin = iin + 1 + a[iin] = 0.0 + iin = iin + 1 + a[iin] = 0.0 + ind = ix + (iy-1)*nx + r[na+1] = lvm[ind] rr[na+1] = r[na+1] - if (edmax-edmin) < EPSLON: neq = neq - 1 + if (edmax-edmin) < EPSLON: + neq = neq - 1 # Addition of Collocated Cosimulation Constraint: if lktype == 4: colc = True - sfmin = 1.0e21 + sfmin = 1.0e21 sfmax = -1.0e21 - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = colocorr*r[i] - if a[iin] < sfmin: sfmin = a[iin] - if a[iin] > sfmax: sfmax = a[iin] - iin = iin + 1 + if a[iin] < sfmin: + sfmin = a[iin] + if a[iin] > sfmax: + sfmax = a[iin] + iin = iin + 1 a[iin] = 1.0 - ii = na - r[ii] = colocorr + ii = na + r[ii] = colocorr rr[ii] = r[ii] # if (sfmax-sfmin) < EPSLON: # neq = neq - 1 @@ -1128,18 +1205,18 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # print('r'); print(r) # print('data'); print(vra) if neq == 1 and lktype != 3: -# print('neq = 1 '); print(a,r) - s[0] = r[0] / a[0] + # print('neq = 1 '); print(a,r) + s[0] = r[0] / a[0] else: -# print('neq prior ksol' + str(neq)) - s = ksol_numpy(neq,a,r) + # print('neq prior ksol' + str(neq)) + s = ksol_numpy(neq, a, r) # print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) # print('r'); print(r) - ising = 0 # need to figure this out + ising = 0 # need to figure this out # print('s'); print(s) # Compute the estimate and kriging variance. Recall that kriging type @@ -1150,44 +1227,46 @@ def krige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,lvm,close,covtab,nctx,nct # 4 = Collocated Cosimulation: # print('kriging weights'); print(s) - cmean = 0.0 + cmean = 0.0 # print('cbb = ' + str(cbb)) cstdev = cbb sumwts = 0.0 - for i in range(0,na): - cmean = cmean + s[i]*vra[i] + for i in range(0, na): + cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) - if lktype == 2: cmean = cmean + gmean - if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion - ind = ix + (iy-1)*nx + if lktype == 2: + cmean = cmean + gmean + if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion + ind = ix + (iy-1)*nx # print(ind) # print('neq'); print(neq) # print('s'); print(s.shape) # print('lvm'); print(lvm.shape) # print('colc wt = ' + str(s[na]) + ' for ' + str(lvm[cur_index]) + ' at index ' + str(cur_index)) - cmean = cmean + s[na]*lvm[cur_index] - cstdev = cstdev - s[na] *rr[na] + cmean = cmean + s[na]*lvm[cur_index] + cstdev = cstdev - s[na] * rr[na] # Error message if negative variance: if cstdev < 0.0: -# print('ERROR: Negative Variance: ' + str(cstdev)) + # print('ERROR: Negative Variance: ' + str(cstdev)) cstdev = 0.0 - cstdev = math.sqrt(max(cstdev,0.0)) + cstdev = math.sqrt(max(cstdev, 0.0)) # print('kriging estimate and variance' + str(cmean) + ', ' + str(cstdev)) return cmean, cstdev -def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,nctx,ncty,icnode,ixnode,iynode,cnodev,cnodex,cnodey,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov,MAXCTX,MAXCTY,MAXKR1,MAXKR2): + +def ikrige(ix, iy, nx, ny, xx, yy, lktype, x, y, vr, sec, colocorr, gmean, lvm, close, covtab, nctx, ncty, icnode, ixnode, iynode, cnodev, cnodex, cnodey, nst, c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov, MAXCTX, MAXCTY, MAXKR1, MAXKR2): """GSLIB's KRIGE subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python and modified for indicator kriging by Michael Pyrcz, the University of Texas at Austin (March, 2019). Note this was simplified to 2D only. WARNING: tested only for ktype 0,1,2 (2 is local proportion model / local mean provided, not residual approach) """ EPSLON = 1.0e-20 - cur_index = ix + (iy)*nx + cur_index = ix + (iy)*nx # print('krige at grid '); print(ix,iy) # print('krige at node '); print(xx,yy) # print('grid index = '); print(cur_index) @@ -1198,9 +1277,14 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('kriging') # print('nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('MAXKR1'); print(MAXKR1) - vra = np.zeros(MAXKR1); vrea = np.zeros(MAXKR1) - r = np.zeros(MAXKR1); rr = np.zeros(MAXKR1); s = np.zeros(MAXKR1); a = np.zeros(MAXKR2) - cbb = cova2(0,0,0,0,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + vra = np.zeros(MAXKR1) + vrea = np.zeros(MAXKR1) + r = np.zeros(MAXKR1) + rr = np.zeros(MAXKR1) + s = np.zeros(MAXKR1) + a = np.zeros(MAXKR2) + cbb = cova2(0, 0, 0, 0, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print(r.shape) # Local mean # just pass the local probability as gmean # if lktype == 2: @@ -1210,89 +1294,95 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # Size of the kriging system: first = False - na = nclose + ncnode + na = nclose + ncnode # print('lktype' + str(lktype)) - if lktype == 0: neq = na + if lktype == 0: + neq = na if lktype == 1: -# print('ordinary kriging') + # print('ordinary kriging') + neq = na + 1 + if lktype == 2: + neq = na + if lktype == 3: + neq = na + 2 + if lktype == 4: neq = na + 1 - if lktype == 2: neq = na - if lktype == 3: neq = na + 2 - if lktype == 4: neq = na + 1 # print('prior matrix build neq'); print(neq) # print('na'); print(na) # print('kriging data close'); print(close) # print('kriging node close'); print(icnode) # Set up kriging matrices: - iin=-1 # acocunting for 0 origin + iin = -1 # acocunting for 0 origin # print('krige na' + str(na)) - for j in range(0,na): + for j in range(0, na): -# Sort out the actual location of point "j" - if j < nclose: # adjusted for 0 index origin - index = int(close[j]) - x1 = x[index] - y1 = y[index] + # Sort out the actual location of point "j" + if j < nclose: # adjusted for 0 index origin + index = int(close[j]) + x1 = x[index] + y1 = y[index] vra[j] = vr[index] # print('data: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) # if lvm.shape[0] > 1: # vrea[j]= sec[index]; # else: - vrea[j] = 0.0 # added this - no effect + vrea[j] = 0.0 # added this - no effect # if lktype == 2: vra[j] = vra[j] - vrea[j] # just using local variable mean not full residual approach else: -# It is a previously simulated node (keep index for table look-up): -# print(j) - index = j-(nclose) # adjust for 0 index - x1 = cnodex[index] - y1 = cnodey[index] + # It is a previously simulated node (keep index for table look-up): + # print(j) + index = j-(nclose) # adjust for 0 index + x1 = cnodex[index] + y1 = cnodey[index] vra[j] = cnodev[index] - ind = icnode[index] + ind = icnode[index] # print('prev node: index = ' + str(index) + ', x,y ' + str(x1) + ',' + str(y1) + ', value = ' + str(vra[j])) - ix1 = ix + (int(ixnode[ind])-nctx-1) - iy1 = iy + (int(iynode[ind])-ncty-1) + ix1 = ix + (int(ixnode[ind])-nctx-1) + iy1 = iy + (int(iynode[ind])-ncty-1) # print('ix1, iy1 = '); print(ix1,iy1) - index = ix1 + (iy1-1)*nx + index = ix1 + (iy1-1)*nx # if lktype == 2: # vrea[j]= lvm[index] # vra[j] = vra[j] - vrea[j] - for i in range(0,na): # we need the full matrix -# print('kriging indice populated' + str(j) + ',' + str(i)) -# Sort out the actual location of point "i" + for i in range(0, na): # we need the full matrix + # print('kriging indice populated' + str(j) + ',' + str(i)) + # Sort out the actual location of point "i" if i < nclose: - index = int(close[i]) # adjust for 0 index - x2 = x[index] - y2 = y[index] + index = int(close[i]) # adjust for 0 index + x2 = x[index] + y2 = y[index] else: -# It is a previously simulated node (keep index for table look-up): + # It is a previously simulated node (keep index for table look-up): #print('i = ' + str(i) + ',nclose = ' + str(nclose) + ', na = ' + str(na)) - index = i-(nclose) - x2 = cnodex[index] - y2 = cnodey[index] - ind = icnode[index] + index = i-(nclose) + x2 = cnodex[index] + y2 = cnodey[index] + ind = icnode[index] # print('previous node index' + str(ind)) - ix2 = ix + (int(ixnode[ind])-nctx-1) - iy2 = iy + (int(iynode[ind])-ncty-1) + ix2 = ix + (int(ixnode[ind])-nctx-1) + iy2 = iy + (int(iynode[ind])-ncty-1) # Now, get the covariance value: iin = iin + 1 # print('kriging data location = '); print(x2,y2) # Decide whether or not to use the covariance look-up table: if j <= nclose or i <= nclose: -# print('x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov') -# print(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print('x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov') + # print(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # print('cov'); print(cov) a[iin] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix1 - ix2) -# jj = ncty + 1 + (iy1 - iy2) - cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix1 - ix2) + # jj = ncty + 1 + (iy1 - iy2) + cov = cova2(x1, y1, x2, y2, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # cov = cova2(x1,y1,x2,y2,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) # else: @@ -1302,8 +1392,9 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # Get the RHS value (possibly with covariance look-up table): if j <= nclose: -# print(cc,aa,it,ang,anis,rotmat,maxcov) - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # print(cc,aa,it,ang,anis,rotmat,maxcov) + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for data ') # print('ix,iy ='); print(xx,xx) @@ -1311,19 +1402,20 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n r[j] = cov else: -# Try to use the covariance look-up (if the distance is in range): -# ii = nctx + 1 + (ix - ix1) -# jj = ncty + 1 + (iy - iy1) - -# print('RHS ctable coord' + str(ii) + ',' + str(jj)) -# print('ix,iy ='); print(ix,iy) -# print('ix1,iy1'); print(ix1,iy1) -# if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 -# print('Not using covariance table') -# cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) -# else: -# cov = covtab[ii,jj] - cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # Try to use the covariance look-up (if the distance is in range): + # ii = nctx + 1 + (ix - ix1) + # jj = ncty + 1 + (iy - iy1) + + # print('RHS ctable coord' + str(ii) + ',' + str(jj)) + # print('ix,iy ='); print(ix,iy) + # print('ix1,iy1'); print(ix1,iy1) + # if ii < 0 or ii >= MAXCTX or jj < 0 or jj >= MAXCTY: # adjusted for origin 0 + # print('Not using covariance table') + # cov = cova2(xx,yy,x1,y1,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + # else: + # cov = covtab[ii,jj] + cov = cova2(xx, yy, x1, y1, nst, c0, 9999.9, cc, + aa, it, ang, anis, rotmat, maxcov) # if cov >= 1.0: # print('cov of 1.0 RHS for node ' + str(j)) # print('ix,iy ='); print(xx,xx) @@ -1332,54 +1424,59 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n r[j] = cov # print('kriging, writing RHS '+ str(j) + ',' + str(cov) + 'loc_est' + str(xx) + ',' + str(yy) + 'data' + str(x1) + ',' + str(y1)) rr[j] = r[j] - if lktype == 1: # we need the full array + if lktype == 1: # we need the full array iin = iin + 1 a[iin] = 1.0 - if lktype == 4: # we need the full array + if lktype == 4: # we need the full array iin = iin + 1 a[iin] = colocorr*r[j] # Addition of OK constraint: if lktype == 1 or lktype == 3: - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = 1.0 - iin = iin + 1 - a[iin] = 0.0 - r[na] = 1.0 + iin = iin + 1 + a[iin] = 0.0 + r[na] = 1.0 rr[na] = 1.0 # Addition of the External Drift Constraint: if lktype == 3: - edmin = 999999. + edmin = 999999. edmax = -999999. - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = vrea(i) - if a[iin] edmax: edmax = a[iin] - iin = iin + 1 - a[iin] = 0.0 - iin = iin + 1 - a[iin] = 0.0 - ind = ix + (iy-1)*nx - r[na+1] = lvm[ind] + if a[iin] < edmin: + edmin = a[iin] + if a[iin] > edmax: + edmax = a[iin] + iin = iin + 1 + a[iin] = 0.0 + iin = iin + 1 + a[iin] = 0.0 + ind = ix + (iy-1)*nx + r[na+1] = lvm[ind] rr[na+1] = r[na+1] - if (edmax-edmin) < EPSLON: neq = neq - 1 + if (edmax-edmin) < EPSLON: + neq = neq - 1 # Addition of Collocated Cosimulation Constraint: if lktype == 4: colc = True - sfmin = 1.0e21 + sfmin = 1.0e21 sfmax = -1.0e21 - for i in range(0,na): - iin = iin + 1 + for i in range(0, na): + iin = iin + 1 a[iin] = colocorr*r[i] - if a[iin] < sfmin: sfmin = a[iin] - if a[iin] > sfmax: sfmax = a[iin] - iin = iin + 1 + if a[iin] < sfmin: + sfmin = a[iin] + if a[iin] > sfmax: + sfmax = a[iin] + iin = iin + 1 a[iin] = 1.0 - ii = na - r[ii] = colocorr + ii = na + r[ii] = colocorr rr[ii] = r[ii] # if (sfmax-sfmin) < EPSLON: # neq = neq - 1 @@ -1391,18 +1488,18 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # print('r'); print(r) # print('data'); print(vra) if neq == 1 and lktype != 3: -# print('neq = 1 '); print(a,r) - s[0] = r[0] / a[0] + # print('neq = 1 '); print(a,r) + s[0] = r[0] / a[0] else: -# print('neq prior ksol' + str(neq)) - s = ksol_numpy(neq,a,r) + # print('neq prior ksol' + str(neq)) + s = ksol_numpy(neq, a, r) # print('neq post ksol' + str(neq)) # if s.shape[0]< neq: # print('s shape'); print(s.shape) # print('a'); print(a) # print('r'); print(r) - ising = 0 # need to figure this out + ising = 0 # need to figure this out # print('s'); print(s) # Compute the estimate and kriging variance. Recall that kriging type @@ -1413,43 +1510,45 @@ def ikrige(ix,iy,nx,ny,xx,yy,lktype,x,y,vr,sec,colocorr,gmean,lvm,close,covtab,n # 4 = Collocated Cosimulation: # print('kriging weights'); print(s) - cmean = 0.0 + cmean = 0.0 # print('cbb = ' + str(cbb)) cstdev = cbb sumwts = 0.0 - for i in range(0,na): - cmean = cmean + s[i]*vra[i] + for i in range(0, na): + cmean = cmean + s[i]*vra[i] cstdev = cstdev - s[i]*rr[i] sumwts = sumwts + s[i] if lktype == 1: cstdev = cstdev - s[na] # print('Ordinary Weight' + str(s[na])) # if lktype == 2: cmean = cmean + gmean - if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion - ind = ix + (iy-1)*nx + if lktype == 4 and colc == True: # we may drop colocated if low covariance dispersion + ind = ix + (iy-1)*nx # print(ind) # print('neq'); print(neq) # print('s'); print(s.shape) # print('lvm'); print(lvm.shape) # print('colc wt = ' + str(s[na]) + ' for ' + str(lvm[cur_index]) + ' at index ' + str(cur_index)) - cmean = cmean + s[na]*lvm[cur_index] - cstdev = cstdev - s[na] *rr[na] + cmean = cmean + s[na]*lvm[cur_index] + cstdev = cstdev - s[na] * rr[na] if lktype == 0 or lktype == 2: cmean = cmean + (1.0-sumwts)*gmean # print('cmean'); print(cmean) # Error message if negative variance: if cstdev < 0.0: -# print('ERROR: Negative Variance: ' + str(cstdev)) + # print('ERROR: Negative Variance: ' + str(cstdev)) cstdev = 0.0 - cstdev = math.sqrt(max(cstdev,0.0)) + cstdev = math.sqrt(max(cstdev, 0.0)) # print('kriging estimate and variance' + str(cmean) + ', ' + str(cstdev)) return cmean, cstdev -def getindex(nc,cmn,csiz,loc): + +def getindex(nc, cmn, csiz, loc): ic = min(int((loc - cmn) / csiz), nc - 1) return ic + def correct_trend(trend): """Correct a indicator based trend model for closure (probabilities sum to 1.0). :param trend: ndarray [ny,nx,ncut] @@ -1458,17 +1557,18 @@ def correct_trend(trend): ny = trend.shape[0] nx = trend.shape[1] ncut = trend.shape[2] - for iy in range(0,ny): - for ix in range(0,nx): + for iy in range(0, ny): + for ix in range(0, nx): sum = 0.0 - for ic in range(0,ncut): - sum = sum + trend[iy,ix,ic] + for ic in range(0, ncut): + sum = sum + trend[iy, ix, ic] if sum > 0.0: - for icut in range(0,ncut): - trend[iy,ix,ic] = trend[iy,ix,ic] / sum + for icut in range(0, ncut): + trend[iy, ix, ic] = trend[iy, ix, ic] / sum return trend -def ordrel(ivtype,ncut,ccdf): + +def ordrel(ivtype, ncut, ccdf): """Correct a indicator based CDF for order relations. :param ivtype: variable type, 0 - categorical and 1 - continuous :param ncut: number of categories or thresholds @@ -1478,10 +1578,10 @@ def ordrel(ivtype,ncut,ccdf): # print('input ordering relations'); print(ccdf) ccdfo = np.zeros(ncut) ccdf1 = np.zeros(ncut) - ccdf2 = np.zeros(ncut) # do we need MAXCUT = 100 for these 2? + ccdf2 = np.zeros(ncut) # do we need MAXCUT = 100 for these 2? # Make sure conditional cdf is within [0,1]: - for i in range(0,ncut): + for i in range(0, ncut): if ccdf[i] < 0.0: ccdf1[i] = 0.0 ccdf2[i] = 0.0 @@ -1496,22 +1596,26 @@ def ordrel(ivtype,ncut,ccdf): # Correct sequentially up, then down, and then average: if ivtype == 0: sumcdf = 0.0 - for i in range(0,ncut): + for i in range(0, ncut): sumcdf = sumcdf + ccdf1[i] - if sumcdf <= 0.0: sumcdf = 1.0 - for i in range(0,ncut): + if sumcdf <= 0.0: + sumcdf = 1.0 + for i in range(0, ncut): ccdfo[i] = ccdf1[i] / sumcdf else: - for i in range(1,ncut): - if ccdf1[i] < ccdf1[i-1]: ccdf1[i] = ccdf1[i-1] - for i in range(ncut-2,0,-1): - if ccdf2[i] > ccdf2[i+1]: ccdf2[i] = ccdf2[i+1] - for i in range(0,ncut): + for i in range(1, ncut): + if ccdf1[i] < ccdf1[i-1]: + ccdf1[i] = ccdf1[i-1] + for i in range(ncut-2, 0, -1): + if ccdf2[i] > ccdf2[i+1]: + ccdf2[i] = ccdf2[i+1] + for i in range(0, ncut): ccdfo[i] = 0.5*(ccdf1[i]+ccdf2[i]) # Return with corrected CDF: return ccdfo + def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): """GSLIB's DECLUS program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at @@ -1742,21 +1846,7 @@ def gam(array, tmin, tmax, xsiz, ysiz, ixd, iyd, nlag, isill): return lag, vario, npp -def gamv( - df, - xcol, - ycol, - vcol, - tmin, - tmax, - xlag, - xltol, - nlag, - azm, - atol, - bandwh, - isill, -): +def gamv(df, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): """GSLIB's GAMV program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (Jan, 2019). @@ -1785,6 +1875,13 @@ def gamv( y = df_extract[ycol].values vr = df_extract[vcol].values + return _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, + isill) + + +@jit(**JITKW, parallel=True) +def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): + # Summary statistics for the data after trimming avg = vr.mean() # TODO: not used stdev = vr.std() @@ -1803,7 +1900,7 @@ def gamv( ) # Standardize sill to one by dividing all variogram values by the variance - for il in range(0, nlag + 2): + for il in nb.prange(0, nlag + 2): if isill == 1: vario[il] = vario[il] / sills @@ -1813,7 +1910,7 @@ def gamv( return dis, vario, npp -@jit(nopython=True) +@jit(**JITKW, parallel=True) def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): """Calculate the variogram by looping over combinatorial of data pairs. :param x: x values @@ -1862,8 +1959,8 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): dismxs = ((float(nlag) + 0.5 - EPSLON) * xlag) ** 2 # Main loop over all pairs - for i in range(0, nd): - for j in range(0, nd): + for i in nb.prange(0, nd): + for j in nb.prange(0, nd): # Definition of the lag corresponding to the current pair dx = x[j] - x[i] @@ -1970,7 +2067,8 @@ def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): return dis, vario, npp -def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): + +def varmapv(df, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill): """Calculate the variogram map from irregularly spaced data. :param df: DataFrame with the spatial data, xcol, ycol, vcol coordinates and property columns :param xcol: DataFrame column with x coordinate @@ -1987,12 +2085,35 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): :return: TODO """ # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values + return _varmapv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, + isill) + + +@jit(**JITKW, parallel=False) # runs faster not in parallel +def _varmapv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill): + """Calculate the variogram map from irregularly spaced data. + :param df: DataFrame with the spatial data, xcol, ycol, vcol coordinates and property columns + :param xcol: DataFrame column with x coordinate + :param ycol: DataFrame column with y coordinate + :param vcol: DataFrame column with value of interest + :param tmin: lower trimming limit + :param tmax: upper trimming limit + :param nxlag: number of lags in the x direction + :param nxlag: number of lags in the y direction + :param dxlag: size of the lags in the x direction + :param dylag: size of the lags in the y direction + :param minnp: minimum number of pairs to calculate a variogram value + :param isill: standardize sill to be 1.0 + :return: TODO + """ + # Summary statistics for the data after trimming avg = vr.mean() stdev = vr.std() @@ -2002,63 +2123,65 @@ def varmapv(df,xcol,ycol,vcol,tmin,tmax,nxlag,nylag,dxlag,dylag,minnp,isill): vrmax = vr.max() # Initialize the summation arrays - npp = np.zeros((nylag*2+1,nxlag*2+1)) - gam = np.zeros((nylag*2+1,nxlag*2+1)) - nppf = np.zeros((nylag*2+1,nxlag*2+1)) - gamf = np.zeros((nylag*2+1,nxlag*2+1)) - hm = np.zeros((nylag*2+1,nxlag*2+1)) - tm = np.zeros((nylag*2+1,nxlag*2+1)) - hv = np.zeros((nylag*2+1,nxlag*2+1)) - tv = np.zeros((nylag*2+1,nxlag*2+1)) + npp = np.zeros((nylag*2+1, nxlag*2+1)) + gam = np.zeros((nylag*2+1, nxlag*2+1)) + nppf = np.zeros((nylag*2+1, nxlag*2+1)) + gamf = np.zeros((nylag*2+1, nxlag*2+1)) + hm = np.zeros((nylag*2+1, nxlag*2+1)) + tm = np.zeros((nylag*2+1, nxlag*2+1)) + hv = np.zeros((nylag*2+1, nxlag*2+1)) + tv = np.zeros((nylag*2+1, nxlag*2+1)) # First fix the location of a seed point: - for i in range(0,nd): + for i in nb.prange(0, nd): # Second loop over the data: - for j in range(0,nd): + for j in nb.prange(0, nd): # The lag: ydis = y[j] - y[i] iyl = nylag + int(ydis/dylag) - if iyl < 0 or iyl > nylag*2: # acocunting for 0,...,n-1 array indexing + if iyl < 0 or iyl > nylag*2: # acocunting for 0,...,n-1 array indexing continue xdis = x[j] - x[i] ixl = nxlag + int(xdis/dxlag) - if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing + if ixl < 0 or ixl > nxlag*2: # acocunting for 0,...,n-1 array indexing continue # We have an acceptable pair, therefore accumulate all the statistics # that are required for the variogram: - npp[iyl,ixl] = npp[iyl,ixl] + 1 # our ndarrays read from the base to top, so we flip - tm[iyl,ixl] = tm[iyl,ixl] + vr[i] - hm[iyl,ixl] = hm[iyl,ixl] + vr[j] - tv[iyl,ixl] = tm[iyl,ixl] + vr[i]*vr[i] - hv[iyl,ixl] = hm[iyl,ixl] + vr[j]*vr[j] - gam[iyl,ixl] = gam[iyl,ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) + # our ndarrays read from the base to top, so we flip + npp[iyl, ixl] = npp[iyl, ixl] + 1 + tm[iyl, ixl] = tm[iyl, ixl] + vr[i] + hm[iyl, ixl] = hm[iyl, ixl] + vr[j] + tv[iyl, ixl] = tm[iyl, ixl] + vr[i]*vr[i] + hv[iyl, ixl] = hm[iyl, ixl] + vr[j]*vr[j] + gam[iyl, ixl] = gam[iyl, ixl] + ((vr[i]-vr[j])*(vr[i]-vr[j])) # Get average values for gam, hm, tm, hv, and tv, then compute # the correct "variogram" measure: - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): - if npp[iy,ix] <= minnp: - gam[iy,ix] = -999. - hm[iy,ix] = -999. - tm[iy,ix] = -999. - hv[iy,ix] = -999. - tv[iy,ix] = -999. + for iy in nb.prange(0, nylag*2+1): + for ix in nb.prange(0, nxlag*2+1): + if npp[iy, ix] <= minnp: + gam[iy, ix] = -999. + hm[iy, ix] = -999. + tm[iy, ix] = -999. + hv[iy, ix] = -999. + tv[iy, ix] = -999. else: - rnum = npp[iy,ix] - gam[iy,ix] = gam[iy,ix] / (2*rnum) # semivariogram - hm[iy,ix] = hm[iy,ix] / rnum - tm[iy,ix] = tm[iy,ix] / rnum - hv[iy,ix] = hv[iy,ix] / rnum - hm[iy,ix]*hm[iy,ix] - tv[iy,ix] = tv[iy,ix] / rnum - tm[iy,ix]*tm[iy,ix] + rnum = npp[iy, ix] + gam[iy, ix] = gam[iy, ix] / (2*rnum) # semivariogram + hm[iy, ix] = hm[iy, ix] / rnum + tm[iy, ix] = tm[iy, ix] / rnum + hv[iy, ix] = hv[iy, ix] / rnum - hm[iy, ix]*hm[iy, ix] + tv[iy, ix] = tv[iy, ix] / rnum - tm[iy, ix]*tm[iy, ix] # Attempt to standardize: if isill > 0: - gamf[iy,ix] = gamf[iy,ix]/sills - for iy in range(0,nylag*2+1): - for ix in range(0,nxlag*2+1): - gamf[iy,ix] = gam[nylag*2-iy,ix] - nppf[iy,ix] = npp[nylag*2-iy,ix] + gamf[iy, ix] = gamf[iy, ix]/sills + for iy in nb.prange(0, nylag*2+1): + for ix in nb.prange(0, nxlag*2+1): + gamf[iy, ix] = gam[nylag*2-iy, ix] + nppf[iy, ix] = npp[nylag*2-iy, ix] return gamf, nppf + def vmodel( nlag, xlag, @@ -2076,11 +2199,11 @@ def vmodel( """ # Parameters - MAXNST=4 - DEG2RAD=3.14159265/180.0 - MAXROT=MAXNST+1 + MAXNST = 4 + DEG2RAD = 3.14159265/180.0 + MAXROT = MAXNST+1 EPSLON = 1.0e-20 - VERSION= 1.01 + VERSION = 1.01 # Declare arrays index = np.zeros(nlag+1) @@ -2115,19 +2238,21 @@ def vmodel( print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff)) rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, 99999.9) - - xx = 0.0; yy = 0.0 - for il in range(0,nlag+1): + xx = 0.0 + yy = 0.0 + for il in range(0, nlag+1): index[il] = il - cov[il] = cova2(0.0,0.0,xx,yy,nst,c0,9999.9,cc,aa,it,ang,anis,rotmat,maxcov) + cov[il] = cova2(0.0, 0.0, xx, yy, nst, c0, 9999.9, + cc, aa, it, ang, anis, rotmat, maxcov) gam[il] = maxcov - cov[il] - ro[il] = cov[il]/maxcov - h[il] = math.sqrt(max((xx*xx+yy*yy),0.0)) + ro[il] = cov[il]/maxcov + h[il] = math.sqrt(max((xx*xx+yy*yy), 0.0)) xx = xx + xoff yy = yy + yoff # finished - return index,h,gam,cov,ro + return index, h, gam, cov, ro + def nscore( df, vcol, wcol=None, ismooth=False, dfsmooth=None, smcol=0, smwcol=0 @@ -2205,6 +2330,7 @@ def nscore( _dists = [] _weights = [] + def kb2d( df, xcol, @@ -2266,15 +2392,24 @@ def kb2d( # load the variogram nst = vario['nst'] - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Allocate the needed memory: xdb = np.zeros(MAXDIS) @@ -2288,21 +2423,23 @@ def kb2d( rr = np.zeros(MAXKD) s = np.zeros(MAXKD) a = np.zeros(MAXKRG) - kmap = np.zeros((nx,ny)) - vmap = np.zeros((nx,ny)) + kmap = np.zeros((nx, ny)) + vmap = np.zeros((nx, ny)) # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics for the data after trimming avg = vr.mean() @@ -2321,40 +2458,42 @@ def kb2d( # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): - ndb = nxdis * nydis + ndb = nxdis * nydis if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') return kmap - xdis = xsiz / max(float(nxdis),1.0) - ydis = ysiz / max(float(nydis),1.0) + xdis = xsiz / max(float(nxdis), 1.0) + ydis = ysiz / max(float(nydis), 1.0) xloc = -0.5*(xsiz+xdis) - i = -1 # accounting for 0 as lowest index - for ix in range(0,nxdis): + i = -1 # accounting for 0 as lowest index + for ix in range(0, nxdis): xloc = xloc + xdis yloc = -0.5*(ysiz+ydis) - for iy in range(0,nydis): + for iy in range(0, nydis): yloc = yloc + ydis i = i+1 xdb[i] = xloc ydb[i] = yloc # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius*radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(xdb[0], ydb[0], xdb[0], ydb[0], nst, c0, + PMX, cc, aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov - first = False + first = False if ndb <= 1: cbb = cov else: - for i in range(0,ndb): - for j in range(0,ndb): - cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + for i in range(0, ndb): + for j in range(0, ndb): + cov = cova2(xdb[i], ydb[i], xdb[j], ydb[j], nst, + c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) if i == j: cov = cov - c0 cbb = cbb + cov @@ -2364,51 +2503,57 @@ def kb2d( nk = 0 ak = 0.0 vk = 0.0 - for iy in range(0,ny): + for iy in range(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in range(0, nx): xloc = xmn + (ix-0)*xsiz - current_node = (yloc,xloc) + current_node = (yloc, xloc) # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search + # use kd tree for fast nearest data search + dist, nums = tree.query( + current_node, ndmax, distance_upper_bound=radius) # remove any data outside search radius na = len(dist) - nums = nums[dist UNEST: nk = nk + 1 ak = ak + est @@ -2520,18 +2670,19 @@ def kb2d( return kmap, vmap + _nums = None _dists = None _weights = None -@jit(nopython=True) -def kb2d_jit_main( - tree, nd, x, y, vr, - xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, - ndmax, radius, ktype, skmean, vario_int, vario_float): +@jit(**JITKW, parallel=False) # numba crashed on parallel=True +def _kb2d_jit( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float): -# Constants + # Constants UNEST = -999. EPSLON = 1.0e-10 VERSION = 2.907 @@ -2542,7 +2693,7 @@ def kb2d_jit_main( MAXKD = MAXSAM + 1 MAXKRG = MAXKD * MAXKD - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) # load the variogram nst = vario_int['nst'] @@ -2564,9 +2715,8 @@ def kb2d_jit_main( rr = np.zeros(MAXKD) s = np.zeros(MAXKD) a = np.zeros(MAXKRG) - kmap = np.zeros((nx,ny)) - vmap = np.zeros((nx,ny)) - + kmap = np.zeros((nx, ny)) + vmap = np.zeros((nx, ny)) c0 = vario_float['nug'] cc[0] = vario_float['cc1'] @@ -2593,7 +2743,7 @@ def kb2d_jit_main( # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the # offsets relative to the block center (this only gets done once): - ndb = nxdis * nydis + ndb = nxdis * nydis if ndb > MAXDIS: print('ERROR KB2D: Too many discretization points ') print(' Increase MAXDIS or lower n[xy]dis') @@ -2601,7 +2751,7 @@ def kb2d_jit_main( xdis = xsiz / max(float(nxdis), 1.0) ydis = ysiz / max(float(nydis), 1.0) xloc = -0.5 * (xsiz + xdis) - i = -1 # accounting for 0 as lowest index + i = -1 # accounting for 0 as lowest index for ix in range(0, nxdis): xloc = xloc + xdis yloc = -0.5 * (ysiz+ydis) @@ -2612,24 +2762,26 @@ def kb2d_jit_main( ydb[i] = yloc # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius * radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(xdb[0],ydb[0],xdb[0],ydb[0],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(xdb[0], ydb[0], xdb[0], ydb[0], nst, c0, + PMX, cc, aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov - first = False + first = False if ndb <= 1: cbb = cov else: - for i in range(0, ndb): - for j in range(0, ndb): - cov = cova2(xdb[i],ydb[i],xdb[j],ydb[j],nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + for i in nb.prange(0, ndb): + for j in nb.prange(0, ndb): + cov = cova2(xdb[i], ydb[i], xdb[j], ydb[j], nst, + c0, PMX, cc, aa, it, ang, anis, rotmat, maxcov) if i == j: cov = cov - c0 - cbb = cbb + cov + cbb += cov # print(real) # cbb = cbb/real(ndb*ndb) # TODO: undefined? @@ -2638,9 +2790,9 @@ def kb2d_jit_main( ak = 0.0 vk = 0.0 - for iy in range(0,ny): + for iy in nb.prange(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in nb.prange(0, nx): xloc = xmn + (ix-0)*xsiz current_node = (yloc, xloc) @@ -2653,38 +2805,42 @@ def kb2d_jit_main( dist, nums = tree[current_node] # remove any data outside search radius na = len(dist) - nums = nums[dist UNEST: nk = nk + 1 ak = ak + est @@ -2837,7 +2997,8 @@ def kb2d_jit( :return: """ # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) x = df_extract[xcol].values y = df_extract[ycol].values @@ -2845,20 +3006,23 @@ def kb2d_jit( # Make a KDTree for fast search of nearest neighbours # dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) + data_locs = np.column_stack((y, x)) import scipy.spatial as sp - _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) vario_int = Dict.empty( - key_type = nb.types.unicode_type, - value_type = nb.types.i8 + key_type=nb.types.unicode_type, + value_type=nb.types.i8 ) vario_float = Dict.empty( - key_type = nb.types.unicode_type, - value_type = nb.f8 + key_type=nb.types.unicode_type, + value_type=nb.f8 ) + vario = copy(vario) + vario_float['nug'] = vario.pop('nug') vario_float['cc1'] = vario.pop('cc1') vario_float['cc2'] = vario.pop('cc2') @@ -2867,18 +3031,19 @@ def kb2d_jit( vario_int[key] = vario[key] tree = Dict.empty( - key_type = nb.types.Tuple((nb.f8, nb.f8)), - value_type = nb.types.Tuple((nb.f8[:], nb.i4[:])) + key_type=nb.types.Tuple((nb.f8, nb.f8)), + value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) ) - for iy in range(0,ny): + for iy in range(0, ny): yloc = ymn + (iy-0)*ysiz - for ix in range(0,nx): + for ix in range(0, nx): xloc = xmn + (ix-0)*xsiz - current_node = (yloc,xloc) - tree[current_node] = _tree.query(current_node, ndmax, distance_upper_bound=radius) + current_node = (yloc, xloc) + tree[current_node] = _tree.query( + current_node, ndmax, distance_upper_bound=radius) - kmap, vmap = kb2d_jit_main( + kmap, vmap = _kb2d_jit( tree, nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, ndmax, radius, ktype, skmean, vario_int, vario_float) @@ -2886,8 +3051,7 @@ def kb2d_jit( return kmap, vmap -def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xmn,xsiz,ny,ymn,ysiz,ndmin,ndmax,radius,ktype,vario): - +def ik2d(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, ndmin, ndmax, radius, ktype, vario): """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). @@ -2925,30 +3089,42 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm MAXEQ = MAXSAM + 1 mik = 0 # full indicator kriging use_trend = False - if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: use_trend = True + if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: + use_trend = True # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((MAXNST,ncut)) - aa = np.zeros((MAXNST,ncut),dtype=int); it = np.zeros((MAXNST,ncut),dtype=int) - ang = np.zeros((MAXNST,ncut)); anis = np.zeros((MAXNST,ncut)) - - for icut in range(0,ncut): + nst = np.zeros(ncut, dtype=int) + c0 = np.zeros(ncut) + cc = np.zeros((MAXNST, ncut)) + aa = np.zeros((MAXNST, ncut), dtype=int) + it = np.zeros((MAXNST, ncut), dtype=int) + ang = np.zeros((MAXNST, ncut)) + anis = np.zeros((MAXNST, ncut)) + + for icut in range(0, ncut): nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[0,icut] = vario[icut]['cc1']; it[0,icut] = vario[icut]['it1']; - ang[0,icut] = vario[icut]['azi1']; - aa[0,icut] = vario[icut]['hmaj1']; anis[0,icut] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + c0[icut] = vario[icut]['nug'] + cc[0, icut] = vario[icut]['cc1'] + it[0, icut] = vario[icut]['it1'] + ang[0, icut] = vario[icut]['azi1'] + aa[0, icut] = vario[icut]['hmaj1'] + anis[0, icut] = vario[icut]['hmin1']/vario[icut]['hmaj1'] if nst[icut] == 2: - cc[1,icut] = vario[icut]['cc2']; it[1,icut] = vario[icut]['it2']; ang[1,icut] = vario[icut]['azi2']; - aa[1,icut] = vario[icut]['hmaj2']; anis[1,icut] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + cc[1, icut] = vario[icut]['cc2'] + it[1, icut] = vario[icut]['it2'] + ang[1, icut] = vario[icut]['azi2'] + aa[1, icut] = vario[icut]['hmaj2'] + anis[1, icut] = vario[icut]['hmin2']/vario[icut]['hmaj2'] # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] MAXDAT = len(df_extract) MAXCUT = ncut MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) + MAXROT = MAXNST*MAXCUT + 1 + ikout = np.zeros((nx, ny, ncut)) maxcov = np.zeros(ncut) # Allocate the needed memory: @@ -2961,8 +3137,8 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm rr = np.zeros(MAXEQ) s = np.zeros(MAXEQ) a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) + ikmap = np.zeros((nx, ny, ncut)) + vr = np.zeros((MAXDAT, MAXCUT+1)) nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) @@ -2970,7 +3146,7 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) + ikout = np.zeros((nx, ny, ncut)) x = df_extract[xcol].values y = df_extract[ycol].values @@ -2980,31 +3156,33 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm # data value. if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + vr[:, icut] = np.where( + (v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v + for icut in range(0, ncut): + vr[:, icut] = np.where(v <= thresh[icut], '1', '0') + vr[:, ncut] = v # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, MAXDAT)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics of the input data - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() + avg = vr[:, ncut].mean() + stdev = vr[:, ncut].std() ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() + vrmin = vr[:, ncut].min() + vrmax = vr[:, ncut].max() print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - actloc = np.zeros(MAXDAT, dtype = int) - for i in range(1,MAXDAT): + actloc = np.zeros(MAXDAT, dtype=int) + for i in range(1, MAXDAT): actloc[i] = i # Set up the rotation/anisotropy matrices that are needed for the @@ -3013,38 +3191,40 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[:,ic],cc[:,ic],ang[:,ic],9999.9) + for ic in range(0, ncut): + rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic], int( + nst[ic]), it[:, ic], cc[:, ic], ang[:, ic], 9999.9) rotmat.append(rotmat_temp) # Initialize accumulators: # not setup yet nk = 0 xk = 0.0 vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 + for icut in range(0, ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 xviol[icut] = -1.0 nxy = nx*ny print('Working on the kriging') # Report on progress from time to time: if koption == 0: - nxy = nx*ny + nxy = nx*ny nloop = nxy - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) else: nloop = 10000000 - irepo = max(1,min((nd/10),10000)) + irepo = max(1, min((nd/10), 10000)) ddh = 0.0 # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: - for index in range(0,nloop): + for index in range(0, nloop): - if (int(index/irepo)*irepo) == index: print(' currently on estimate ' + str(index)) + if (int(index/irepo)*irepo) == index: + print(' currently on estimate ' + str(index)) if koption == 0: - iy = int((index)/nx) - ix = index - (iy)*nx + iy = int((index)/nx) + ix = index - (iy)*nx xloc = xmn + (ix)*xsiz yloc = ymn + (iy)*ysiz else: @@ -3055,92 +3235,98 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - current_node = (yloc,xloc) - dist, close = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + current_node = (yloc, xloc) + # use kd tree for fast nearest data search + dist, close = tree.query(current_node, ndmax) # remove any data outside search radius - close = close[dist= 1: krig = False + if mik == 1 and ic >= 1: + krig = False # Identify the close data (there may be a different number of data at # each threshold because of constraint intervals); however, if # there are no constraint intervals then this step can be avoided. nca = -1 - for ia in range(0,nclose): - j = int(close[ia]+0.5) + for ia in range(0, nclose): + j = int(close[ia]+0.5) ii = actloc[j] accept = True - if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: accept = False + if koption != 0 and (abs(x[j]-xloc) + abs(y[j]-yloc)).lt.EPSLON: + accept = False if accept: nca = nca + 1 - vra[nca] = vr[ii,ic] - xa[nca] = x[j] - ya[nca] = y[j] + vra[nca] = vr[ii, ic] + xa[nca] = x[j] + ya[nca] = y[j] # If there are no samples at this threshold then use the global cdf: if nca == -1: if use_trend: - ccdf[ic] = trend[ny-iy-1,ix,ic] + ccdf[ic] = trend[ny-iy-1, ix, ic] else: ccdf[ic] = gcdf[ic] else: -# Now, only load the variogram, build the matrix,... if kriging: + # Now, only load the variogram, build the matrix,... if kriging: neq = nclose + ktype na = nclose # Set up kriging matrices: - iin=-1 # accounting for first index of 0 - for j in range(0,na): -# Establish Left Hand Side Covariance Matrix: - for i in range(0,na): # was j - want full matrix + iin = -1 # accounting for first index of 0 + for j in range(0, na): + # Establish Left Hand Side Covariance Matrix: + for i in range(0, na): # was j - want full matrix iin = iin + 1 - a[iin] = cova2(xa[i],ya[i],xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + a[iin] = cova2(xa[i], ya[i], xa[j], ya[j], nst[ic], c0[ic], PMX, cc[:, ic], + aa[:, ic], it[:, ic], ang[:, ic], anis[:, ic], rotmat[ic], maxcov[ic]) if ktype == 1: iin = iin + 1 a[iin] = maxcov[ic] - r[j] = cova2(xloc,yloc,xa[j],ya[j],nst[ic],c0[ic],PMX,cc[:,ic],aa[:,ic],it[:,ic],ang[:,ic],anis[:,ic],rotmat[ic],maxcov[ic]) + r[j] = cova2(xloc, yloc, xa[j], ya[j], nst[ic], c0[ic], PMX, cc[:, ic], + aa[:, ic], it[:, ic], ang[:, ic], anis[:, ic], rotmat[ic], maxcov[ic]) # Set the unbiasedness constraint: if ktype == 1: - for i in range(0,na): + for i in range(0, na): iin = iin + 1 a[iin] = maxcov[ic] - iin = iin + 1 - a[iin] = 0.0 - r[neq-1] = maxcov[ic] + iin = iin + 1 + a[iin] = 0.0 + r[neq-1] = maxcov[ic] rr[neq-1] = r[neq] # Solve the system: if neq == 1: ising = 0.0 - s[0] = r[0] / a[0] + s[0] = r[0] / a[0] else: - s = ksol_numpy(neq,a,r) + s = ksol_numpy(neq, a, r) # Finished kriging (if it was necessary): # Compute Kriged estimate of cumulative probability: - sumwts = 0.0 + sumwts = 0.0 ccdf[ic] = 0.0 - for i in range(0,nclose): + for i in range(0, nclose): ccdf[ic] = ccdf[ic] + vra[i]*s[i] - sumwts = sumwts + s[i] + sumwts = sumwts + s[i] if ktype == 0: if use_trend == True: - ccdf[ic] = ccdf[ic] + (1.0-sumwts)*trend[ny-iy-1,ix,ic] + ccdf[ic] = ccdf[ic] + \ + (1.0-sumwts)*trend[ny-iy-1, ix, ic] else: ccdf[ic] = ccdf[ic] + (1.0-sumwts)*gcdf[ic] @@ -3148,65 +3334,85 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm # Correct and write the distribution to the output file: nk = nk + 1 - ccdfo = ordrel(ivtype,ncut,ccdf) + ccdfo = ordrel(ivtype, ncut, ccdf) # Write the IK CCDF for this grid node: if koption == 0: - ikout[ny-iy-1,ix,:] = ccdfo + ikout[ny-iy-1, ix, :] = ccdfo else: - print('TBD') + print('TBD') return ikout -def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtcol,zmin,zmax,ltail,ltpar,utail,utpar,nsim, - nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin,ndmax,nodmax,mults,nmult,noct,radius,radius1,sang1, - mxctx,mxcty,ktype,colocorr,sec_map,vario): -# Parameters from sgsim.inc - MAXNST=2; MAXROT=2; UNEST=-99.0; EPSLON=1.0e-20; VERSION=2.907 - KORDEI=12; MAXOP1=KORDEI+1; MAXINT=2**30 +def sgsim(df, xcol, ycol, vcol, wcol, scol, tmin, tmax, itrans, ismooth, dftrans, tcol, twtcol, zmin, zmax, ltail, ltpar, utail, utpar, nsim, + nx, xmn, xsiz, ny, ymn, ysiz, seed, ndmin, ndmax, nodmax, mults, nmult, noct, radius, radius1, sang1, + mxctx, mxcty, ktype, colocorr, sec_map, vario): + + # Parameters from sgsim.inc + MAXNST = 2 + MAXROT = 2 + UNEST = -99.0 + EPSLON = 1.0e-20 + VERSION = 2.907 + KORDEI = 12 + MAXOP1 = KORDEI+1 + MAXINT = 2**30 # Set other parameters np.random.seed(seed) nxy = nx*ny - sstrat = 0 # search data and nodes by default, turned off if unconditional + sstrat = 0 # search data and nodes by default, turned off if unconditional radsqd = radius * radius sanis1 = radius1/radius - if ktype == 4: varred = 1.0 + if ktype == 4: + varred = 1.0 # load the variogram nst = int(vario['nst']) - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst,dtype=int) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst, dtype=int) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Set the constants MAXCTX = mxctx MAXCTY = mxcty MAXCXY = MAXCTX * MAXCTY - MAXX = nx - MAXY = ny - MAXZ = 1 # assuming 2D for now - MXY = MAXX * MAXY - if MXY < 100: MXY = 100 + MAXX = nx + MAXY = ny + MAXZ = 1 # assuming 2D for now + MXY = MAXX * MAXY + if MXY < 100: + MXY = 100 MAXNOD = nodmax MAXSAM = ndmax MAXKR1 = MAXNOD + MAXSAM + 1 # print('MAXKR1'); print(MAXKR1) MAXKR2 = MAXKR1 * MAXKR1 MAXSBX = 1 - if nx > 1: + if nx > 1: MAXSBX = int(nx/2) - if MAXSBX > 50: MAXSBX=50 + if MAXSBX > 50: + MAXSBX = 50 MAXSBY = 1 if ny > 1: MAXSBY = int(ny/2) - if MAXSBY > 50: MAXSBY=50 + if MAXSBY > 50: + MAXSBY = 50 MAXSBZ = 1 MAXSB = MAXSBX*MAXSBY*MAXSBZ @@ -3214,12 +3420,14 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Declare arrays dist = np.zeros(ndmax) - nums = np.zeros(ndmax,dtype = int) + nums = np.zeros(ndmax, dtype=int) # Perform some quick checks - if nx > MAXX or ny> MAXY: - print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') - print(' you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.') + if nx > MAXX or ny > MAXY: + print('ERROR: available grid size: ' + str(MAXX) + + ',' + str(MAXY) + ',' + str(MAXZ) + '.') + print(' you have asked for : ' + str(nx) + + ',' + str(ny) + ',' + str(nz) + '.') return sim if ltail != 1 and ltail != 2: print('ERROR invalid lower tail option ' + str(ltail)) @@ -3243,25 +3451,29 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc return sim # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values vr_orig = np.copy(vr) # print('size of data extract'); print(len(vr)) - wt = []; wt = np.array(wt) + wt = [] + wt = np.array(wt) if wcol > -1: wt = df_extract[wcol].values else: wt = np.ones(nd) - sec = []; sec = np.array(sec) + sec = [] + sec = np.array(sec) if scol > -1: sec = df_extract[scol].values if itrans == 1: if ismooth == 1: - dftrans_extract = dftrans.loc[(dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] + dftrans_extract = dftrans.loc[( + dftrans[tcol] >= tmin) & (dftrans[tcol] <= tmax)] ntr = len(dftrans_extract) vrtr = dftrans_extrac[tcol].values if twtcol > -1: @@ -3274,85 +3486,94 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc vrgtr = np.copy(wt) twt = np.sum(vrgtr) # sort - vrtr,vrgtr = dsortem(0,ntr,vrtr,2,b=vrgtr) + vrtr, vrgtr = dsortem(0, ntr, vrtr, 2, b=vrgtr) # Compute the cumulative probabilities and write transformation table - twt = max(twt,EPSLON) + twt = max(twt, EPSLON) oldcp = 0.0 - cp = 0.0 + cp = 0.0 # print('ntr'); print(ntr) - for j in range(0,ntr): - cp = cp + vrgtr[j]/twt - w = (cp + oldcp)*0.5 + for j in range(0, ntr): + cp = cp + vrgtr[j]/twt + w = (cp + oldcp)*0.5 vrg = gauinv(w) - oldcp = cp + oldcp = cp # Now, reset the weight to the normal scores value: vrgtr[j] = vrg twt = np.sum(wt) # Normal scores transform the data - for id in range(0,nd): + for id in range(0, nd): if itrans == 1: vrr = vr[id] - j = dlocate(vrtr,1,nd,vrr) - j = min(max(0,j),(nd-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[nd-1]): vrg = vrgtr[nd-1] + j = dlocate(vrtr, 1, nd, vrr) + j = min(max(0, j), (nd-2)) + vrg = dpowint(vrtr[j], vrtr[j+1], vrgtr[j], + vrgtr[j+1], vrr, 1.0) + if vrg < vrgtr[0]: + vrg = vrgtr[0] + if(vrg > vrgtr[nd-1]): + vrg = vrgtr[nd-1] vr[id] = vrg - weighted_stats_orig = DescrStatsW(vr_orig,weights=wt) + weighted_stats_orig = DescrStatsW(vr_orig, weights=wt) orig_av = weighted_stats_orig.mean orig_ss = weighted_stats_orig.var - weighted_stats = DescrStatsW(vr,weights=wt) + weighted_stats = DescrStatsW(vr, weights=wt) av = weighted_stats.mean ss = weighted_stats.var print('\n Data for SGSIM: Number of acceptable data = ' + str(nd)) - print(' Number trimmed = ' + str(len(df)- nd)) - print(' Weighted Average = ' + str(round(orig_av,4))) - print(' Weighted Variance = ' + str(round(orig_ss,4))) - print(' Weighted Transformed Average = ' + str(round(av,4))) - print(' Weighted Transformed Variance = ' + str(round(ss,4))) + print(' Number trimmed = ' + str(len(df) - nd)) + print(' Weighted Average = ' + + str(round(orig_av, 4))) + print(' Weighted Variance = ' + + str(round(orig_ss, 4))) + print(' Weighted Transformed Average = ' + str(round(av, 4))) + print(' Weighted Transformed Variance = ' + str(round(ss, 4))) # Read in secondary data sim = np.random.rand(nx*ny) index = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): sim[index] = index - lvm = []; lvm = np.array(lvm) + lvm = [] + lvm = np.array(lvm) if ktype >= 2: #lvm = np.copy(sec_map.flatten()) ind = 0 lvm = np.zeros(nxy) - for iy in range(0,ny): - for ix in range(0,nx): - lvm[ind] = sec_map[ny-iy-1,ix] + for iy in range(0, ny): + for ix in range(0, nx): + lvm[ind] = sec_map[ny-iy-1, ix] ind = ind + 1 if ktype == 2 and itrans == 1: - for ixy in range(0,nxy): -# Do we to transform the secondary variable for a local mean? + for ixy in range(0, nxy): + # Do we to transform the secondary variable for a local mean? vrr = lvm[ixy] - j = dlocate(vrtr,1,ntr,vrr) - j = min(max(0,j),(ntr-2)) - vrg = dpowint(vrtr[j],vrtr[j+1],vrgtr[j],vrgtr[j+1],vrr,1.0) - if vrg < vrgtr[0]: vrg = vrgtr[0] - if(vrg > vrgtr[ntr-1]): vrg = vrgtr[nd-1] + j = dlocate(vrtr, 1, ntr, vrr) + j = min(max(0, j), (ntr-2)) + vrg = dpowint(vrtr[j], vrtr[j+1], vrgtr[j], + vrgtr[j+1], vrr, 1.0) + if vrg < vrgtr[0]: + vrg = vrgtr[0] + if(vrg > vrgtr[ntr-1]): + vrg = vrgtr[nd-1] lvm[ixy] = vrg av = np.average(lvm) ss = np.var(lvm) print(' Secondary Data: Number of data = ' + str(nx*ny)) - print(' Equal Weighted Average = ' + str(round(av,4))) - print(' Equal Weighted Variance = ' + str(round(ss,4))) + print(' Equal Weighted Average = ' + str(round(av, 4))) + print(' Equal Weighted Variance = ' + str(round(ss, 4))) # Do we need to work with data residuals? (Locally Varying Mean) if ktype == 2: sec = np.zeros(nd) - for idd in range(0,nd): - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) index = ix + (iy-1)*nx sec[idd] = lvm[index] # Calculation of residual moved to krige subroutine: vr(i)=vr(i)-sec(i) @@ -3360,10 +3581,10 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Do we need to get an external drift attribute for the data? if ktype == 3: - for idd in range(0,nd): + for idd in range(0, nd): if sec[i] != UNEST: - ix = getindx(nx,xmn,xsiz,x[idd]) - iy = getindx(ny,ymn,ysiz,y[idd]) + ix = getindx(nx, xmn, xsiz, x[idd]) + iy = getindx(ny, ymn, ysiz, y[idd]) ind = ix + (iy)*nx sec[ind] = lvm[ind] @@ -3371,53 +3592,57 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc if ktype == 4: order_sec = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order_sec[ixy] = ind ind = ind + 1 print(' Transforming Secondary Data with') print(' variance reduction of ' + str(varred)) - lvm,order_sec = dsortem(0,nxy,lvm,2,b=order_sec) + lvm, order_sec = dsortem(0, nxy, lvm, 2, b=order_sec) oldcp = 0.0 - cp = 0.0 - for i in range(0,nxy): - cp = cp + (1.0/(nxy)) - w = (cp + oldcp)/2.0 + cp = 0.0 + for i in range(0, nxy): + cp = cp + (1.0/(nxy)) + w = (cp + oldcp)/2.0 lvm[i] = gauinv(w) lvm[i] = lvm[i] * varred - oldcp = cp - order_sec,lvm = dsortem(0,nxy,order_sec,2,b=lvm) + oldcp = cp + order_sec, lvm = dsortem(0, nxy, order_sec, 2, b=lvm) # return np.reshape(lvm,(ny,nx)) # check the transform # Set up the rotation/anisotropy matrices that are needed for the # variogram and search. print('Setting up rotation matrices for variogram and search') if nst == 1: - rotmat = setrot(ang[0],ang[0],sang1,anis[0],anis[0],sanis1,nst,MAXROT=2) + rotmat = setrot(ang[0], ang[0], sang1, anis[0], + anis[0], sanis1, nst, MAXROT=2) else: - rotmat = setrot(ang[0],ang[1],sang1,anis[0],anis[1],sanis1,nst,MAXROT=2) - isrot = 2 # search rotation is appended as 3rd + rotmat = setrot(ang[0], ang[1], sang1, anis[0], + anis[1], sanis1, nst, MAXROT=2) + isrot = 2 # search rotation is appended as 3rd - rotmat_2d, maxcov = setup_rotmat2(c0,nst,it,cc,ang) # will use one in the future + rotmat_2d, maxcov = setup_rotmat2( + c0, nst, it, cc, ang) # will use one in the future # print('MaxCov = ' + str(maxcov)) # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Set up the covariance table and the spiral search: - cov_table,tmp,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst,c0,cc,aa,it,ang,anis,rotmat,radsqd) + cov_table, tmp, order, ixnode, iynode, nlooku, nctx, ncty = ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MXY, + xsiz, ysiz, isrot, nx, ny, nst, c0, cc, aa, it, ang, anis, rotmat, radsqd) # print('Covariance Table'); print(cov_table) # MAIN LOOP OVER ALL THE SIMULAUTIONS: - for isim in range(0,nsim): + for isim in range(0, nsim): -# Work out a random path for this realization: + # Work out a random path for this realization: sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order[ixy] = ind ind = ind + 1 @@ -3425,35 +3650,37 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # somewhat arbitrary): if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) + for imult in range(0, nmult): + nny = int(max(1, ny/((imult+1)*4))) + nnx = int(max(1, nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 + jy = 1 + jx = 1 + for iy in range(0, nny): + if nny > 0: + jy = iy*(imult+1)*4 + for ix in range(0, nnx): + if nnx > 0: + jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) # Initialize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) + sim, order = dsortem(0, nxy, sim, 2, b=order) sim.fill(UNEST) print('Working on realization number ' + str(isim)) # Assign the data to the closest grid node: TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + # print('data'); print(x[idd],y[idd]) + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) test = abs(xx-x[idd]) + abs(yy-y[idd]) @@ -3468,16 +3695,18 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc sim[ind] = id2 # Assign a flag so that this node does not get simulated: - if sstrat == 0 and test <= TINY: sim[ind]=10.0*UNEST + if sstrat == 0 and test <= TINY: + sim[ind] = 10.0*UNEST # Now, enter data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0, nxy): idd = int(sim[ind]+0.5) - if idd > 0: sim[ind] = vr[id] - irepo = max(1,min((nxy/10),10000)) + if idd > 0: + sim[ind] = vr[id] + irepo = max(1, min((nxy/10), 10000)) # MAIN LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0, nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) @@ -3485,12 +3714,13 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # not been assigned a value already: index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): + continue + iy = int((index)/nx) + ix = index - (iy)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz - current_node = (yy,xx) + current_node = (yy, xx) # print('Current_node'); print(current_node) # Now, we'll simulate the point ix,iy,iz. First, get the close data @@ -3499,29 +3729,33 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # simulated grid nodes: if sstrat == 0: -# print('searching for nearest data') + # print('searching for nearest data') na = -1 # accounting for 0 as first index if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist = np.zeros(1) + nums = np.zeros(1) + # use kd tree for fast nearest data search + dist[0], nums[0] = tree.query(current_node, ndmax) else: - dist, nums = tree.query(current_node,ndmax) + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) - nums = nums[dist (UNEST+EPSLON): - simval = backtr_value(simval,vrtr,vrgtr,zmin,zmax,ltail,ltpar,utail,utpar) - if simval < zmin: simval = zmin - if simval > zmax: simval = zmax + simval = backtr_value( + simval, vrtr, vrgtr, zmin, zmax, ltail, ltpar, utail, utpar) + if simval < zmin: + simval = zmin + if simval > zmax: + simval = zmax sim[ind] = simval # print('simulated value = ' + str(sim[ind]) + ' at location index = ' + str(ind)) - av = av / max(ne,1.0) - ss =(ss / max(ne,1.0)) - av * av + av = av / max(ne, 1.0) + ss = (ss / max(ne, 1.0)) - av * av print('\n Realization ' + str(isim) + ': number = ' + str(ne)) - print(' mean = ' + str(round(av,4)) + ' (close to 0.0?)') - print(' variance = ' + str(round(ss,4)) + ' (close to gammabar(V,V)? approx. 1.0)') + print(' mean = ' + + str(round(av, 4)) + ' (close to 0.0?)') + print(' variance = ' + + str(round(ss, 4)) + ' (close to gammabar(V,V)? approx. 1.0)') # END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] + sim_out = np.zeros((ny, nx)) + for ind in range(0, nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1, ix] = sim[ind] return sim_out -def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin, - ndmax,nodmax,mults,nmult,noct,radius,ktype,vario): +def sisim(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, zmin, zmax, ltail, ltpar, middle, mpar, utail, utpar, nx, xmn, xsiz, ny, ymn, ysiz, seed, ndmin, + ndmax, nodmax, mults, nmult, noct, radius, ktype, vario): """A 2D version of GSLIB's SISIM Indicator Simulation program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (March, 2019). WARNING: only tested for cateogrical ktype 0, 1 and 2 (locally variable proportion). @@ -3653,40 +3895,42 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin EPSLON = 1.0e-20 VERSION = 0.001 np.random.seed(seed) - colocorr = 0.0 # no collocated cokriging - lvm = 0 # no kriging with a locally variable mean - sec = []; sec = np.array(sec) # no secondary data - ng = 0 # no tabulated values + colocorr = 0.0 # no collocated cokriging + lvm = 0 # no kriging with a locally variable mean + sec = [] + sec = np.array(sec) # no secondary data + ng = 0 # no tabulated values # Find the needed paramters: PMX = 9999.9 MAXSAM = ndmax + 1 MAXEQ = MAXSAM + 1 - nxy = nx*ny + nxy = nx*ny mik = 0 # full indicator kriging use_trend = False - trend1d = np.zeros((nxy,1)) # no trend make a dummy trend + trend1d = np.zeros((nxy, 1)) # no trend make a dummy trend if trend.shape[0] == nx and trend.shape[1] == ny and trend.shape[2] == ncut: - trend1d = np.zeros((nxy,ncut)) + trend1d = np.zeros((nxy, ncut)) use_trend = True index = 0 - for iy in range(0,ny): - for ix in range(0,nx): - for ic in range(0,ncut): - trend1d[index,ic] = trend[ny-iy-1,ix,ic] # copy trend + for iy in range(0, ny): + for ix in range(0, nx): + for ic in range(0, ncut): + trend1d[index, ic] = trend[ny-iy-1, ix, ic] # copy trend index = index + 1 MAXORD = nxy MAXNOD = nodmax - cnodeiv = np.zeros((ncut+1,MAXNOD)) + cnodeiv = np.zeros((ncut+1, MAXNOD)) tmp = np.zeros(MAXORD) - sstrat = 0 # search data and nodes by default, turned off if unconditional - sang1 = 0 # using isotropic search now + sstrat = 0 # search data and nodes by default, turned off if unconditional + sang1 = 0 # using isotropic search now sanis1 = 1.0 # No covariance lookup table - mxctx = int(radius/xsiz)*2+1; mxcty = int(radius/xsiz)*2+1 + mxctx = int(radius/xsiz)*2+1 + mxcty = int(radius/xsiz)*2+1 # print('cov table / spiral search nx, ny '); print(mxctx); print(mxcty) MAXCTX = mxctx MAXCTY = mxcty @@ -3704,45 +3948,55 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin if nx > 1: MAXSBX = int(nx/2) if MAXSBX > 50: - MAXSBX=50 + MAXSBX = 50 MAXSBY = 1 if ny > 1: MAXSBY = int(ny/2) if MAXSBY > 50: - MAXSBY=50 + MAXSBY = 50 # print('ncut'); print(ncut) # load the variogram MAXNST = 2 - nst = np.zeros(ncut,dtype=int); c0 = np.zeros(ncut); cc = np.zeros((ncut,MAXNST)) - aa = np.zeros((ncut,MAXNST),dtype=int); it = np.zeros((ncut,MAXNST),dtype=int) - ang = np.zeros((ncut,MAXNST)); anis = np.zeros((ncut,MAXNST)) + nst = np.zeros(ncut, dtype=int) + c0 = np.zeros(ncut) + cc = np.zeros((ncut, MAXNST)) + aa = np.zeros((ncut, MAXNST), dtype=int) + it = np.zeros((ncut, MAXNST), dtype=int) + ang = np.zeros((ncut, MAXNST)) + anis = np.zeros((ncut, MAXNST)) # print('varios - 1 vario'); print(vario[1]) - for icut in range(0,ncut): -# print('icut'); print(icut) + for icut in range(0, ncut): + # print('icut'); print(icut) nst[icut] = int(vario[icut]['nst']) - c0[icut] = vario[icut]['nug']; cc[icut,0] = vario[icut]['cc1']; it[icut,0] = vario[icut]['it1']; - ang[icut,0] = vario[icut]['azi1']; - aa[icut,0] = vario[icut]['hmaj1']; anis[icut,0] = vario[icut]['hmin1']/vario[icut]['hmaj1']; + c0[icut] = vario[icut]['nug'] + cc[icut, 0] = vario[icut]['cc1'] + it[icut, 0] = vario[icut]['it1'] + ang[icut, 0] = vario[icut]['azi1'] + aa[icut, 0] = vario[icut]['hmaj1'] + anis[icut, 0] = vario[icut]['hmin1']/vario[icut]['hmaj1'] if nst[icut] == 2: - cc[icut,1] = vario[icut]['cc2']; it[icut,1] = vario[icut]['it2']; ang[icut,1] = vario[icut]['azi2']; - aa[icut,1] = vario[icut]['hmaj2']; anis[icut,1] = vario[icut]['hmin2']/vario[icut]['hmaj2']; + cc[icut, 1] = vario[icut]['cc2'] + it[icut, 1] = vario[icut]['it2'] + ang[icut, 1] = vario[icut]['azi2'] + aa[icut, 1] = vario[icut]['hmaj2'] + anis[icut, 1] = vario[icut]['hmin2']/vario[icut]['hmaj2'] # print('check loaded cov model- icut '); print(icut) # print(cc[icut],aa[icut],it[icut],ang[icut],anis[icut]) - # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] MAXDAT = len(df_extract) nd = MAXDAT MAXCUT = ncut MAXNST = 2 - MAXROT = MAXNST*MAXCUT+ 1 - ikout = np.zeros((nx,ny,ncut)) + MAXROT = MAXNST*MAXCUT + 1 + ikout = np.zeros((nx, ny, ncut)) maxcov = np.zeros(ncut) # Allocate the needed memory: @@ -3755,8 +4009,8 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin rr = np.zeros(MAXEQ) s = np.zeros(MAXEQ) a = np.zeros(MAXEQ*MAXEQ) - ikmap = np.zeros((nx,ny,ncut)) - vr = np.zeros((MAXDAT,MAXCUT+1)) + ikmap = np.zeros((nx, ny, ncut)) + vr = np.zeros((MAXDAT, MAXCUT+1)) nviol = np.zeros(MAXCUT) aviol = np.zeros(MAXCUT) @@ -3764,13 +4018,13 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin ccdf = np.zeros(ncut) ccdfo = np.zeros(ncut) - ikout = np.zeros((nx,ny,ncut)) + ikout = np.zeros((nx, ny, ncut)) x = df_extract[xcol].values y = df_extract[ycol].values v = df_extract[vcol].values - MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used + MAXTAB = MAXDAT + MAXCUT # tabulated probabilities not used gcut = np.zeros(MAXTAB) # The indicator data are constructed knowing the thresholds and the @@ -3778,32 +4032,35 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('ncut'); print(ncut) if ivtype == 0: - for icut in range(0,ncut): - vr[:,icut] = np.where((v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + vr[:, icut] = np.where( + (v <= thresh[icut] + 0.5) & (v > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - vr[:,icut] = np.where(v <= thresh[icut], '1', '0') - vr[:,ncut] = v + for icut in range(0, ncut): + vr[:, icut] = np.where(v <= thresh[icut], '1', '0') + vr[:, ncut] = v # print('loaded data '); print(vr) # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,MAXDAT)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, MAXDAT)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics of the input data - avg = vr[:,ncut].mean() - stdev = vr[:,ncut].std() + avg = vr[:, ncut].mean() + stdev = vr[:, ncut].std() ss = stdev**2.0 - vrmin = vr[:,ncut].min() - vrmax = vr[:,ncut].max() + vrmin = vr[:, ncut].min() + vrmax = vr[:, ncut].max() print('Data for IK3D: Variable column ' + str(vcol)) print(' Number = ' + str(MAXDAT)) ndh = MAXDAT - actloc = np.zeros(MAXDAT, dtype = int) # need to set up data at node locations - for i in range(1,MAXDAT): + # need to set up data at node locations + actloc = np.zeros(MAXDAT, dtype=int) + for i in range(1, MAXDAT): actloc[i] = i # Set up the rotation/anisotropy matrices that are needed for the @@ -3812,24 +4069,27 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin print('Setting up rotation matrices for variogram and search') radsqd = radius * radius rotmat = [] - for ic in range(0,ncut): - rotmat_temp, maxcov[ic] = setup_rotmat(c0[ic],int(nst[ic]),it[ic],cc[ic],ang[ic],9999.9) + for ic in range(0, ncut): + rotmat_temp, maxcov[ic] = setup_rotmat( + c0[ic], int(nst[ic]), it[ic], cc[ic], ang[ic], 9999.9) rotmat.append(rotmat_temp) - #return rotmat + # return rotmat # Set up the covariance table and the spiral search based just on the first variogram # This is ok as we are not using the covariance look up table, just spiral search for previous nodes - isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here + isrot = MAXNST*MAXCUT + 1 # note I removed anisotropic search here # print('ang[0]'); print(ang[0]) if nst[0] == 1: - global_rotmat = setrot(ang[0,0],ang[0,0],sang1,anis[0,0],anis[0,0],sanis1,nst[0],MAXROT=2) + global_rotmat = setrot( + ang[0, 0], ang[0, 0], sang1, anis[0, 0], anis[0, 0], sanis1, nst[0], MAXROT=2) else: - global_rotmat = setrot(ang[0,0],ang[1,0],sang1,anis[0,0],anis[1,0],sanis1,nst[0],MAXROT=2) + global_rotmat = setrot( + ang[0, 0], ang[1, 0], sang1, anis[0, 0], anis[1, 0], sanis1, nst[0], MAXROT=2) - cov_table,tmp2,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, - xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) + cov_table, tmp2, order, ixnode, iynode, nlooku, nctx, ncty = ctable(MAXNOD, MAXCXY, MAXCTX, MAXCTY, MXY, + xsiz, ysiz, isrot, nx, ny, nst[0], c0[0], cc[0], aa[0], it[0], ang[0], anis[0], global_rotmat, radsqd) # print('spiral search number nodes '); print(nlooku) @@ -3838,21 +4098,21 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin nk = 0 xk = 0.0 vk = 0.0 - for icut in range (0,ncut): - nviol[icut] = 0 - aviol[icut] = 0.0 + for icut in range(0, ncut): + nviol[icut] = 0 + aviol[icut] = 0.0 xviol[icut] = -1.0 # print('Working on the kriging') # Report on progress from time to time: if koption == 0: - nxy = nx*ny + nxy = nx*ny nloop = nxy - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) else: nloop = 10000000 - irepo = max(1,min((nd/10),10000)) + irepo = max(1, min((nd/10), 10000)) ddh = 0.0 # MAIN LOOP OVER ALL THE SIMULAUTIONS: @@ -3862,7 +4122,7 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): + for ixy in range(0, nxy): order[ixy] = ind ind = ind + 1 @@ -3870,22 +4130,24 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # soat arbitrary): if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) + for imult in range(0, nmult): + nny = int(max(1, ny/((imult+1)*4))) + nnx = int(max(1, nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 + jy = 1 + jx = 1 + for iy in range(0, nny): + if nny > 0: + jy = iy*(imult+1)*4 + for ix in range(0, nnx): + if nnx > 0: + jx = ix*(imult+1)*4 index = jx + (jy-1)*nx sim[index] = sim[index] - (imult+1) # Inlize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) + sim, order = dsortem(0, nxy, sim, 2, b=order) sim.fill(UNEST) tmp.fill(0.0) print('Working on a single realization, seed ' + str(seed)) @@ -3894,13 +4156,13 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # As the data to the closest grid node: TINY = 0.0001 - for idd in range(0,nd): -# print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) + for idd in range(0, nd): + # print('data'); print(x[idd],y[idd]) + ix = getindex(nx, xmn, xsiz, x[idd]) + iy = getindex(ny, ymn, ysiz, y[idd]) ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) test = abs(xx-x[idd]) + abs(yy-y[idd]) @@ -3918,29 +4180,30 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # Another data values into the simulated grid: - for ind in range(0,nxy): + for ind in range(0, nxy): idd = int(sim[ind]+0.5) if idd > 0: sim[ind] = vr[idd] else: tmp[ind] = sim[ind] sim[ind] = UNEST - irepo = max(1,min((nxy/10),10000)) + irepo = max(1, min((nxy/10), 10000)) # LOOP OVER ALL THE NODES: - for ind in range(0,nxy): + for ind in range(0, nxy): if (int(ind/irepo)*irepo) == ind: print(' currently on node ' + str(ind)) # Find the index on the random path, check if assigned data and get location index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): + continue + iy = int((index)/nx) + ix = index - (iy)*nx xx = xmn + (ix)*xsiz yy = ymn + (iy)*ysiz - current_node = (yy,xx) + current_node = (yy, xx) # print('Current_node'); print(current_node) # Now we'll simulate the point ix,iy,iz. First, get the close data @@ -3949,22 +4212,25 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # simulated grid nodes: if sstrat == 0: -# print('searching for nearest data') + # print('searching for nearest data') na = -1 # accounting for 0 as first index if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + dist = np.zeros(1) + nums = np.zeros(1) + # use kd tree for fast nearest data search + dist[0], nums[0] = tree.query(current_node, ndmax) else: - dist, nums = tree.query(current_node,ndmax) + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius # print('nums'); print(nums) # print('dist'); print(dist) na = len(dist) - nums = nums[dist 0: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') + for icut in range(0, ncut): + cnodeiv[icut, :] = np.where( + (cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') else: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') - cnodeiv[ncut,:] = cnodev + for icut in range(0, ncut): + cnodeiv[icut, :] = np.where(cnodev <= thresh[icut], '1', '0') + cnodeiv[ncut, :] = cnodev # print('indicator transformed nearest nodes'); print(cnodeiv) @@ -3994,53 +4262,54 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # print('nums'); print(nums) # What cdf value are we looking for? - zval = UNEST + zval = UNEST cdfval = np.random.rand() # Use the global distribution? # check inputs # print('nst'); print(nst) - - if nclose + ncnode <= 0: -# print('nclose & ncnode'); print(nclose, ncnode) - zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + # print('nclose & ncnode'); print(nclose, ncnode) + zval = beyond(ivtype, ncut, thresh, gcdf, ng, gcut, gcdf, zmin, + zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval) else: -# print('kriging') -# Estimate the local distribution by indicator kriging: -# print('maxcov'); print(maxcov) - for ic in range(0,ncut): -# print('check kriging cov model- icut '); print(ic) -# print('node data values for kriging'); print(cnodev) -# print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) - #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) + # print('kriging') + # Estimate the local distribution by indicator kriging: + # print('maxcov'); print(maxcov) + for ic in range(0, ncut): + # print('check kriging cov model- icut '); print(ic) + # print('node data values for kriging'); print(cnodev) + # print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) + # ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) if ktype == 0: gmean = gcdf[ic] elif ktype == 2: - gmean = trend1d[index,ic] + gmean = trend1d[index, ic] else: - gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used + gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used # print('gmean'); print(gmean) - ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, - icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], - rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) + ccdf[ic], cstdev = ikrige(ix, iy, nx, ny, xx, yy, ktype, x, y, vr[:, ic], sec, colocorr, gmean, trend[:, ic], nums, cov_table, nctx, ncty, + icnode, ixnode, iynode, cnodeiv[ic], cnodex, cnodey, nst[ic], c0[ + ic], 9999.9, cc[ic], aa[ic], it[ic], ang[ic], anis[ic], + rotmat[ic], maxcov[ic], MAXCTX, MAXCTY, MAXKR1, MAXKR2) # print('ccdf'); print(ccdf) # Correct order relations: - ccdfo = ordrel(ivtype,ncut,ccdf) + ccdfo = ordrel(ivtype, ncut, ccdf) # Draw from the local distribution: - zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + zval = beyond(ivtype, ncut, thresh, ccdfo, ng, gcut, gcdf, zmin, + zmax, ltail, ltpar, middle, mpar, utail, utpar, zval, cdfval) sim[index] = zval # print('zval'); print(zval) # END MAIN LOOP OVER SIMULATIONS: - sim_out = np.zeros((ny,nx)) - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ny-iy-1,ix] = sim[ind] + sim_out = np.zeros((ny, nx)) + for ind in range(0, nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ny-iy-1, ix] = sim[ind] return sim_out @@ -4095,15 +4364,24 @@ def kb2d_locations( # load the variogram nst = vario['nst'] - cc = np.zeros(nst); aa = np.zeros(nst); it = np.zeros(nst) - ang = np.zeros(nst); anis = np.zeros(nst) + cc = np.zeros(nst) + aa = np.zeros(nst) + it = np.zeros(nst) + ang = np.zeros(nst) + anis = np.zeros(nst) - c0 = vario['nug']; - cc[0] = vario['cc1']; it[0] = vario['it1']; ang[0] = vario['azi1']; - aa[0] = vario['hmaj1']; anis[0] = vario['hmin1']/vario['hmaj1']; + c0 = vario['nug'] + cc[0] = vario['cc1'] + it[0] = vario['it1'] + ang[0] = vario['azi1'] + aa[0] = vario['hmaj1'] + anis[0] = vario['hmin1']/vario['hmaj1'] if nst == 2: - cc[1] = vario['cc2']; it[1] = vario['it2']; ang[1] = vario['azi2']; - aa[1] = vario['hmaj2']; anis[1] = vario['hmin2']/vario['hmaj2']; + cc[1] = vario['cc2'] + it[1] = vario['it2'] + ang[1] = vario['azi2'] + aa[1] = vario['hmaj2'] + anis[1] = vario['hmin2']/vario['hmaj2'] # Allocate the needed memory: xa = np.zeros(MAXSAM) @@ -4119,9 +4397,10 @@ def kb2d_locations( vlist = np.zeros(len(df_loc)) # Load the data - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] nd = len(df_extract) - ndmax = min(ndmax,nd) + ndmax = min(ndmax, nd) x = df_extract[xcol].values y = df_extract[ycol].values vr = df_extract[vcol].values @@ -4133,9 +4412,10 @@ def kb2d_locations( vr_loc = df_loc[vcol].values # Make a KDTree for fast search of nearest neighbours - dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y,x)) - tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, copy_data=False, balanced_tree=True) + dp = list((y[i], x[i]) for i in range(0, nd)) + data_locs = np.column_stack((y, x)) + tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) # Summary statistics for the data after trimming avg = vr.mean() @@ -4145,16 +4425,17 @@ def kb2d_locations( vrmax = vr.max() # Initialize accumulators: - cbb = 0.0 + cbb = 0.0 rad2 = radius*radius # Calculate Block Covariance. Check for point kriging. - rotmat, maxcov = setup_rotmat(c0,nst,it,cc,ang,PMX) - cov = cova2(0.0,0.0,0.0,0.0,nst,c0,PMX,cc,aa,it,ang,anis,rotmat,maxcov) + rotmat, maxcov = setup_rotmat(c0, nst, it, cc, ang, PMX) + cov = cova2(0.0, 0.0, 0.0, 0.0, nst, c0, PMX, cc, + aa, it, ang, anis, rotmat, maxcov) # Keep this value to use for the unbiasedness constraint: unbias = cov cbb = cov - first = False + first = False # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: nk = 0 @@ -4165,65 +4446,70 @@ def kb2d_locations( print('Working on location ' + str(idata)) xloc = x_loc[idata] yloc = y_loc[idata] - current_node = (yloc,xloc) + current_node = (yloc, xloc) # Find the nearest samples within each octant: First initialize # the counter arrays: na = -1 # accounting for 0 as first index dist.fill(1.0e+20) nums.fill(-1) - dist, nums = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + # use kd tree for fast nearest data search + dist, nums = tree.query(current_node, ndmax) # remove any data outside search radius na = len(dist) - nums = nums[dist Date: Fri, 1 Nov 2019 18:36:42 -0500 Subject: [PATCH 07/10] remove parallelization --- geostatspy/geostats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 02f006d..8d35d62 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -1879,7 +1879,7 @@ def gamv(df, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill) -@jit(**JITKW, parallel=True) +@jit(**JITKW, parallel=False) def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): # Summary statistics for the data after trimming @@ -1910,7 +1910,7 @@ def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, at return dis, vario, npp -@jit(**JITKW, parallel=True) +@jit(**JITKW, parallel=False) def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): """Calculate the variogram by looping over combinatorial of data pairs. :param x: x values @@ -2676,7 +2676,7 @@ def kb2d( _weights = None -@jit(**JITKW, parallel=False) # numba crashed on parallel=True +@jit(**JITKW, parallel=False) # numba crashed on parallel=False def _kb2d_jit( tree, nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, From 3aeae349cebf89bb48542b469d15ef803dba9af9 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Tue, 21 Jan 2020 10:30:33 -0600 Subject: [PATCH 08/10] some cleanup of testing code --- geostatspy/geostats.py | 220 +++++++++++++++++++---------------------- 1 file changed, 100 insertions(+), 120 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 8d35d62..971a20d 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -2326,11 +2326,6 @@ def nscore( return ns, vr, wt_ns -_nums = [] -_dists = [] -_weights = [] - - def kb2d( df, xcol, @@ -2448,12 +2443,6 @@ def kb2d( vrmin = vr.min() vrmax = vr.max() - global _nums - global _dists - global _weights - _nums = [] - _dists = [] - _weights = [] # Set up the discretization points per block. Figure out how many # are needed, the spacing, and fill the xdb and ydb arrays with the @@ -2655,10 +2644,6 @@ def kb2d( ak = ak + est vk = vk + est*est - if xloc == 21 and yloc == 102: - _nums.append(nums.copy()) - _dists.append(dist.copy()) - _weights.append(s) # END OF MAIN LOOP OVER ALL THE BLOCKS: @@ -2671,9 +2656,106 @@ def kb2d( return kmap, vmap -_nums = None -_dists = None -_weights = None +def kb2d_jit( + df, + xcol, + ycol, + vcol, + tmin, + tmax, + nx, + xmn, + xsiz, + ny, + ymn, + ysiz, + nxdis, + nydis, + ndmin, + ndmax, + radius, + ktype, + skmean, + vario, +): + """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the + original Fortran to Python by Michael Pyrcz, the University of Texas at + Austin (Jan, 2019). + :param df: pandas DataFrame with the spatial data + :param xcol: name of the x coordinate column + :param ycol: name of the y coordinate column + :param vcol: name of the property column + :param tmin: property trimming limit + :param tmax: property trimming limit + :param nx: definition of the grid system (x axis) + :param xmn: definition of the grid system (x axis) + :param xsiz: definition of the grid system (x axis) + :param ny: definition of the grid system (y axis) + :param ymn: definition of the grid system (y axis) + :param ysiz: definition of the grid system (y axis) + :param nxdis: number of discretization points for a block + :param nydis: number of discretization points for a block + :param ndmin: minimum number of data points to use for kriging a block + :param ndmax: maximum number of data points to use for kriging a block + :param radius: maximum isotropic search radius + :param ktype: + :param skmean: + :param vario: + :return: + """ +# Load the data + # trim values outside tmin and tmax + df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] + nd = len(df_extract) + x = df_extract[xcol].values + y = df_extract[ycol].values + vr = df_extract[vcol].values + +# Make a KDTree for fast search of nearest neighbours + # dp = list((y[i], x[i]) for i in range(0,nd)) + data_locs = np.column_stack((y, x)) + import scipy.spatial as sp + _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, + copy_data=False, balanced_tree=True) + + vario_int = Dict.empty( + key_type=nb.types.unicode_type, + value_type=nb.types.i8 + ) + + vario_float = Dict.empty( + key_type=nb.types.unicode_type, + value_type=nb.f8 + ) + + vario = copy(vario) + + vario_float['nug'] = vario.pop('nug') + vario_float['cc1'] = vario.pop('cc1') + vario_float['cc2'] = vario.pop('cc2') + + for key in vario.keys(): + vario_int[key] = vario[key] + + tree = Dict.empty( + key_type=nb.types.Tuple((nb.f8, nb.f8)), + value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) + ) + + for iy in range(0, ny): + yloc = ymn + (iy-0)*ysiz + for ix in range(0, nx): + xloc = xmn + (ix-0)*xsiz + current_node = (yloc, xloc) + tree[current_node] = _tree.query( + current_node, ndmax, distance_upper_bound=radius) + + kmap, vmap = _kb2d_jit( + tree, nd, x, y, vr, + xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, + ndmax, radius, ktype, skmean, vario_int, vario_float) + + return kmap, vmap @jit(**JITKW, parallel=False) # numba crashed on parallel=False @@ -2949,108 +3031,6 @@ def _kb2d_jit( return kmap, vmap -def kb2d_jit( - df, - xcol, - ycol, - vcol, - tmin, - tmax, - nx, - xmn, - xsiz, - ny, - ymn, - ysiz, - nxdis, - nydis, - ndmin, - ndmax, - radius, - ktype, - skmean, - vario, -): - """GSLIB's KB2D program (Deutsch and Journel, 1998) converted from the - original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (Jan, 2019). - :param df: pandas DataFrame with the spatial data - :param xcol: name of the x coordinate column - :param ycol: name of the y coordinate column - :param vcol: name of the property column - :param tmin: property trimming limit - :param tmax: property trimming limit - :param nx: definition of the grid system (x axis) - :param xmn: definition of the grid system (x axis) - :param xsiz: definition of the grid system (x axis) - :param ny: definition of the grid system (y axis) - :param ymn: definition of the grid system (y axis) - :param ysiz: definition of the grid system (y axis) - :param nxdis: number of discretization points for a block - :param nydis: number of discretization points for a block - :param ndmin: minimum number of data points to use for kriging a block - :param ndmax: maximum number of data points to use for kriging a block - :param radius: maximum isotropic search radius - :param ktype: - :param skmean: - :param vario: - :return: - """ -# Load the data - # trim values outside tmin and tmax - df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] - nd = len(df_extract) - x = df_extract[xcol].values - y = df_extract[ycol].values - vr = df_extract[vcol].values - -# Make a KDTree for fast search of nearest neighbours - # dp = list((y[i], x[i]) for i in range(0,nd)) - data_locs = np.column_stack((y, x)) - import scipy.spatial as sp - _tree = sp.cKDTree(data_locs, leafsize=16, compact_nodes=True, - copy_data=False, balanced_tree=True) - - vario_int = Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.types.i8 - ) - - vario_float = Dict.empty( - key_type=nb.types.unicode_type, - value_type=nb.f8 - ) - - vario = copy(vario) - - vario_float['nug'] = vario.pop('nug') - vario_float['cc1'] = vario.pop('cc1') - vario_float['cc2'] = vario.pop('cc2') - - for key in vario.keys(): - vario_int[key] = vario[key] - - tree = Dict.empty( - key_type=nb.types.Tuple((nb.f8, nb.f8)), - value_type=nb.types.Tuple((nb.f8[:], nb.i4[:])) - ) - - for iy in range(0, ny): - yloc = ymn + (iy-0)*ysiz - for ix in range(0, nx): - xloc = xmn + (ix-0)*xsiz - current_node = (yloc, xloc) - tree[current_node] = _tree.query( - current_node, ndmax, distance_upper_bound=radius) - - kmap, vmap = _kb2d_jit( - tree, nd, x, y, vr, - xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, - ndmax, radius, ktype, skmean, vario_int, vario_float) - - return kmap, vmap - - def ik2d(df, xcol, ycol, vcol, ivtype, koption, ncut, thresh, gcdf, trend, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, ndmin, ndmax, radius, ktype, vario): """A 2D version of GSLIB's IK3D Indicator Kriging program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at From a5e78b5e2143fdb341f6d06766b967730b11a608 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Tue, 21 Jan 2020 10:43:11 -0600 Subject: [PATCH 09/10] add parallel=True/False as kwargs --- geostatspy/geostats.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 971a20d..3e8cb95 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -28,6 +28,7 @@ from statsmodels.stats.weightstats import DescrStatsW JITKW = dict(nopython=True, cache=True, fastmath=True) +JITPL = dict(parallel=False) def backtr(df, vcol, vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar): @@ -1879,7 +1880,7 @@ def gamv(df, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill) -@jit(**JITKW, parallel=False) +@jit(**JITKW, **JITPL) def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, atol, bandwh, isill): # Summary statistics for the data after trimming @@ -1910,7 +1911,7 @@ def _gamv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, xlag, xltol, nlag, azm, at return dis, vario, npp -@jit(**JITKW, parallel=False) +@jit(**JITKW, **JITPL) def variogram_loop(x, y, vr, xlag, xltol, nlag, azm, atol, bandwh): """Calculate the variogram by looping over combinatorial of data pairs. :param x: x values @@ -2096,7 +2097,7 @@ def varmapv(df, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill) -@jit(**JITKW, parallel=False) # runs faster not in parallel +@jit(**JITKW, **JITPL) # runs faster not in parallel def _varmapv(nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nxlag, nylag, dxlag, dylag, minnp, isill): """Calculate the variogram map from irregularly spaced data. :param df: DataFrame with the spatial data, xcol, ycol, vcol coordinates and property columns @@ -2758,7 +2759,7 @@ def kb2d_jit( return kmap, vmap -@jit(**JITKW, parallel=False) # numba crashed on parallel=False +@jit(**JITKW) # numba crashed on parallel=True def _kb2d_jit( tree, nd, x, y, vr, xcol, ycol, vcol, tmin, tmax, nx, xmn, xsiz, ny, ymn, ysiz, nxdis, nydis, ndmin, From 2917666cd7c77e3fabf858c6a36dc89995c28664 Mon Sep 17 00:00:00 2001 From: David Fulford Date: Tue, 21 Jan 2020 12:42:35 -0600 Subject: [PATCH 10/10] some cleanup --- geostatspy/geostats.py | 1 - 1 file changed, 1 deletion(-) diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 3e8cb95..9151068 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -2887,7 +2887,6 @@ def _kb2d_jit( # dist, nums = tree.query(current_node,ndmax, distance_upper_bound=radius) # use kd tree for fast nearest data search dist, nums = tree[current_node] # remove any data outside search radius - na = len(dist) nums = nums[dist < radius] dist = dist[dist < radius] na = len(dist)