-
Notifications
You must be signed in to change notification settings - Fork 116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New options: Percent coverage selection and weighting #136
base: master
Are you sure you want to change the base?
Changes from 1 commit
34dff52
ba627d7
6725da1
4ffe2ad
c0d9bc3
a03eb04
795d86e
a9a4a3c
cfa198a
7000632
b52a368
85f62a1
76c8667
cb87d40
644ddc3
143a7cf
7a59a52
41222f9
6955476
1c39067
2fe6d66
d986437
46fcebc
fc558c2
c475b58
a52055f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
from shapely.geometry import shape | ||
from .io import read_features, Raster | ||
from .utils import (rasterize_geom, get_percentile, check_stats, | ||
remap_categories, key_assoc_val, boxify_points) | ||
remap_categories, key_assoc_val, boxify_points, | ||
rasterize_pctcover_geom) | ||
|
||
|
||
def raster_stats(*args, **kwargs): | ||
|
@@ -36,6 +37,9 @@ def gen_zonal_stats( | |
affine=None, | ||
stats=None, | ||
all_touched=False, | ||
percent_cover_selection=None, | ||
percent_cover_weighting=False, | ||
percent_cover_scale=None, | ||
categorical=False, | ||
category_map=None, | ||
add_stats=None, | ||
|
@@ -80,6 +84,29 @@ def gen_zonal_stats( | |
those having a center point within the polygon. | ||
defaults to `False` | ||
|
||
percent_cover_selection: float, optional | ||
Include only raster cells that have at least the given percent | ||
covered by the vector feature. Requires percent_cover_scale argument | ||
be used to specify scale at which to generate percent coverage | ||
estimates | ||
|
||
percent_cover_weighting: bool, optional | ||
whether or not to use percent coverage of cells during calculations | ||
to adjust stats (only applies to mean, count and sum) | ||
|
||
percent_cover_scale: int, optional | ||
Scale used when generating percent coverage estimates of each | ||
raster cell by vector feature. Percent coverage is generated by | ||
rasterizing the feature at a finer resolution than the raster | ||
(based on percent_cover_scale value) then using a summation to aggregate | ||
to the raster resolution and dividing by the square of percent_cover_scale | ||
to get percent coverage value for each cell. Increasing percent_cover_scale | ||
will increase the accuracy of percent coverage values; three orders | ||
magnitude finer resolution (percent_cover_scale=1000) is usually enough to | ||
get coverage estimates with <1% error in individual edge cells coverage | ||
estimates, though much smaller values (e.g., percent_cover_scale=10) are often | ||
sufficient (<10% error) and require less memory. | ||
|
||
categorical: bool, optional | ||
|
||
category_map: dict | ||
|
@@ -139,20 +166,71 @@ def gen_zonal_stats( | |
warnings.warn("Use `band` to specify band number", DeprecationWarning) | ||
band = band_num | ||
|
||
# check inputs related to percent coverage | ||
percent_cover = False | ||
if percent_cover_weighting or percent_cover_selection is not None: | ||
percent_cover = True | ||
if percent_cover_scale is None: | ||
warnings.warn('No value for `percent_cover_scale` was given. ' | ||
'Using default value of 10.') | ||
percent_cover_scale = 10 | ||
|
||
try: | ||
if percent_cover_scale != int(percent_cover_scale): | ||
warnings.warn('Value for `percent_cover_scale` given ({0}) ' | ||
'was converted to int ({1}) but does not ' | ||
'match original value'.format( | ||
percent_cover_scale, int(percent_cover_scale))) | ||
|
||
percent_cover_scale = int(percent_cover_scale) | ||
|
||
if percent_cover_scale <= 1: | ||
raise Exception('Value for `percent_cover_scale` must be ' | ||
'greater than one ({0})'.format( | ||
percent_cover_scale)) | ||
|
||
except: | ||
raise Exception('Invalid value for `percent_cover_scale` ' | ||
'provided ({0}). Must be type int.'.format( | ||
percent_cover_scale)) | ||
|
||
if percent_cover_selection is not None: | ||
try: | ||
percent_cover_selection = float(percent_cover_selection) | ||
except: | ||
raise Exception('Invalid value for `percent_cover_selection` ' | ||
'provided ({0}). Must be able to be converted ' | ||
'to a float.'.format(percent_cover_selection)) | ||
|
||
if not all_touched: | ||
warnings.warn('`all_touched` was not enabled but an option requiring ' | ||
'percent_cover calculations was selected. Automatically ' | ||
'enabling `all_touched`.') | ||
all_touched = True | ||
|
||
|
||
with Raster(raster, affine, nodata, band) as rast: | ||
features_iter = read_features(vectors, layer) | ||
for _, feat in enumerate(features_iter): | ||
geom = shape(feat['geometry']) | ||
|
||
if 'Point' in geom.type: | ||
geom = boxify_points(geom, rast) | ||
percent_cover = False | ||
|
||
geom_bounds = tuple(geom.bounds) | ||
|
||
fsrc = rast.read(bounds=geom_bounds) | ||
|
||
# rasterized geometry | ||
rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) | ||
if percent_cover: | ||
rv_array = rasterize_pctcover_geom( | ||
geom, shape=fsrc.shape, affine=fsrc.affine, | ||
scale=percent_cover_scale) | ||
else: | ||
rv_array = rasterize_geom( | ||
geom, shape=fsrc.shape, affine=fsrc.affine, | ||
all_touched=all_touched) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like it is asking for trouble - these shouldn't be labelled the same thing, as they have different meanings and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed, will change that |
||
|
||
# nodata mask | ||
isnodata = (fsrc.array == fsrc.nodata) | ||
|
@@ -164,9 +242,14 @@ def gen_zonal_stats( | |
|
||
# Mask the source data array | ||
# mask everything that is not a valid value or not within our geom | ||
masked = np.ma.MaskedArray( | ||
fsrc.array, | ||
mask=(isnodata | ~rv_array)) | ||
if percent_cover_selection is not None: | ||
masked = np.ma.MaskedArray( | ||
fsrc.array, | ||
mask=(isnodata | ~rv_array | percent_cover > percent_cover_selection)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be wrong - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right. that should be |
||
else: | ||
masked = np.ma.MaskedArray( | ||
fsrc.array, | ||
mask=(isnodata | ~rv_array)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will raise an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch. think i can switch it to |
||
|
||
# execute zone_func on masked zone ndarray | ||
if zone_func is not None: | ||
|
@@ -187,7 +270,6 @@ def gen_zonal_stats( | |
pixel_count = dict(zip([np.asscalar(k) for k in keys], | ||
[np.asscalar(c) for c in counts])) | ||
|
||
|
||
if categorical: | ||
feature_stats = dict(pixel_count) | ||
if category_map: | ||
|
@@ -200,12 +282,23 @@ def gen_zonal_stats( | |
if 'max' in stats: | ||
feature_stats['max'] = float(masked.max()) | ||
if 'mean' in stats: | ||
feature_stats['mean'] = float(masked.mean()) | ||
if percent_cover_weighting: | ||
feature_stats['mean'] = float( | ||
np.sum(masked * rv_array) / | ||
np.sum(~masked.mask * rv_array)) | ||
else: | ||
feature_stats['mean'] = float(masked.mean()) | ||
if 'count' in stats: | ||
feature_stats['count'] = int(masked.count()) | ||
if percent_cover_weighting: | ||
feature_stats['count'] = float(np.sum(~masked.mask * rv_array)) | ||
else: | ||
feature_stats['count'] = int(masked.count()) | ||
# optional | ||
if 'sum' in stats: | ||
feature_stats['sum'] = float(masked.sum()) | ||
if percent_cover_weighting: | ||
feature_stats['sum'] = float(np.sum(masked * rv_array)) | ||
else: | ||
feature_stats['sum'] = float(masked.sum()) | ||
if 'std' in stats: | ||
feature_stats['std'] = float(masked.std()) | ||
if 'median' in stats: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
from __future__ import division | ||
import sys | ||
from rasterio import features | ||
from affine import Affine | ||
from numpy import min_scalar_type | ||
from shapely.geometry import box, MultiPolygon | ||
from .io import window_bounds | ||
|
||
|
@@ -25,12 +27,13 @@ def get_percentile(stat): | |
return q | ||
|
||
|
||
def rasterize_geom(geom, like, all_touched=False): | ||
def rasterize_geom(geom, shape, affine, all_touched=False): | ||
sgoodm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Parameters | ||
---------- | ||
geom: GeoJSON geometry | ||
like: raster object with desired shape and transform | ||
shape: desired shape | ||
affine: desired transform | ||
all_touched: rasterization strategy | ||
|
||
Returns | ||
|
@@ -40,15 +43,55 @@ def rasterize_geom(geom, like, all_touched=False): | |
geoms = [(geom, 1)] | ||
rv_array = features.rasterize( | ||
geoms, | ||
out_shape=like.shape, | ||
transform=like.affine, | ||
out_shape=shape, | ||
transform=affine, | ||
fill=0, | ||
dtype='uint8', | ||
all_touched=all_touched) | ||
|
||
return rv_array.astype(bool) | ||
|
||
|
||
# https://stackoverflow.com/questions/8090229/ | ||
# resize-with-averaging-or-rebin-a-numpy-2d-array/8090605#8090605 | ||
def rebin_sum(a, shape, dtype): | ||
sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1] | ||
return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't dug into this code but why choose this implementation over other methods of resampling? Specifically, using Rasterio's resampling techniques would give us more control over the resampling methods versus assuming "rebin" implies categorizing pixel values, I think "upsample" or similar would be a more accurate function name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I looked into Rasterio's resampling methods, but I tested out a couple of different implementations (one was a proof of concept you put together for Rasterio, rasterio/rasterio#232, another was a more generalized aggregation scheme I pulled from another project of mine which had way too much overhead for what was needed here) and this method was a fair bit faster with less code. My main concern with any method here is going to be minimizing the additional time/memory required to run when using this feature. Did you have a use case in mind that would require using a method other than sum? I am on board with renaming to something more accurate, I had just kept it similar to the original function from SO I used. |
||
|
||
|
||
def rasterize_pctcover_geom(geom, shape, affine, scale=None): | ||
""" | ||
Parameters | ||
---------- | ||
geom: GeoJSON geometry | ||
shape: desired shape | ||
affine: desired transform | ||
scale: scale at which to generate percent cover estimate | ||
|
||
Returns | ||
------- | ||
ndarray: float32 | ||
""" | ||
if scale is None: | ||
scale = 10 | ||
|
||
min_dtype = min_scalar_type(scale**2) | ||
|
||
pixel_size = affine[0]/scale | ||
topleftlon = affine[2] | ||
topleftlat = affine[5] | ||
|
||
new_affine = Affine(pixel_size, 0, topleftlon, | ||
0, -pixel_size, topleftlat) | ||
|
||
new_shape = (shape[0]*scale, shape[1]*scale) | ||
|
||
rv_array = rasterize_geom(geom, new_shape, new_affine, True) | ||
rv_array = rebin_sum(rv_array, shape, min_dtype) | ||
|
||
return rv_array.astype('float32') / (scale**2) | ||
|
||
|
||
def stats_to_csv(stats): | ||
if sys.version_info[0] >= 3: | ||
from io import StringIO as IO # pragma: no cover | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain why we need to limit to integers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reshape
performed in therebin_sum
function requires intshttps://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html