Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Polygon support #819

Closed
wants to merge 6 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Added Polygon support
 - New PolygonsArray/LinesArray/PointsArray extension arrays
 - New Canvas.polygons() method to rasterize polygons
 - Updates to Canvas.points and Canvas.lines to accept new geometry arrays.
jonmmease committed Oct 23, 2019
commit b38e7cf21b033602973c2cd0d8ae21e70c88e16c
2 changes: 1 addition & 1 deletion datashader/__init__.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
from .pipeline import Pipeline # noqa (API import)
from . import transfer_functions as tf # noqa (API import)
from . import data_libraries # noqa (API import)

from . import geom # noqa (API import)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The module name geom could be confusing here, as we have both spatial and geo already. The contents of geo.py should probably be moved into the other two, so let's ignore that one. spatial is full of functions for working with aggregate arrays, whereas geom is full of functions for doing the aggregations. geom vs. spatial doesn't convey that distinction, to me. I can't immediately think of a better name, though.


# Make RaggedArray pandas extension array available for
# pandas >= 0.24.0 is installed
177 changes: 134 additions & 43 deletions datashader/core.py
Original file line number Diff line number Diff line change
@@ -133,6 +133,18 @@ def validate(self, range):
_axis_lookup = {'linear': LinearAxis(), 'log': LogAxis()}


def validate_xy_or_geometry(glyph, x, y, geometry):
if (geometry is None and (x is None or y is None) or
geometry is not None and (x is not None or y is not None)):
raise ValueError("""
{glyph} coordinates may be specified by providing both the x and y arguments, or by
providing the geometry argument. Received:
x: {x}
y: {y}
geometry: {geometry}
""".format(glyph=glyph, x=repr(x), y=repr(y), geometry=repr(geometry)))


class Canvas(object):
"""An abstract canvas representing the space in which to bin.

@@ -157,34 +169,46 @@ def __init__(self, plot_width=600, plot_height=600,
self.x_axis = _axis_lookup[x_axis_type]
self.y_axis = _axis_lookup[y_axis_type]

def points(self, source, x, y, agg=None):
def points(self, source, x=None, y=None, agg=None, geometry=None):
"""Compute a reduction by pixel, mapping data to pixels as points.

Parameters
----------
source : pandas.DataFrame, dask.DataFrame, or xarray.DataArray/Dataset
The input datasource.
x, y : str
Column names for the x and y coordinates of each point.
Column names for the x and y coordinates of each point. If provided,
the geometry argument may not also be provided.
agg : Reduction, optional
Reduction to compute. Default is ``count()``.
geometry: str
Column name of a PointsArray of the coordinates of each point. If provided,
the x and y arguments may not also be provided.
"""
from .glyphs import Point
from .glyphs import Point, PointGeom
from .reductions import count as count_rdn

validate_xy_or_geometry('Point', x, y, geometry)

if agg is None:
agg = count_rdn()

if (isinstance(source, SpatialPointsFrame) and
source.spatial is not None and
source.spatial.x == x and source.spatial.y == y and
self.x_range is not None and self.y_range is not None):
# Handle down-selecting of SpatialPointsFrame
if geometry is None:
if (isinstance(source, SpatialPointsFrame) and
source.spatial is not None and
source.spatial.x == x and source.spatial.y == y and
self.x_range is not None and self.y_range is not None):

source = source.spatial_query(
x_range=self.x_range, y_range=self.y_range)
source = source.spatial_query(
x_range=self.x_range, y_range=self.y_range)
glyph = Point(x, y)
else:
glyph = PointGeom(geometry)

return bypixel(source, self, Point(x, y), agg)
return bypixel(source, self, glyph, agg)

def line(self, source, x, y, agg=None, axis=0):
def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None):
"""Compute a reduction by pixel, mapping data to pixels as one or
more lines.

@@ -215,6 +239,9 @@ def line(self, source, x, y, agg=None, axis=0):
all rows in source
* 1: Draw one line per row in source using data from the
specified columns
geometry : str
Column name of a LinesArray of the coordinates of each line. If provided,
the x and y arguments may not also be provided.

Examples
--------
@@ -284,55 +311,61 @@ def line(self, source, x, y, agg=None, axis=0):
"""
from .glyphs import (LineAxis0, LinesAxis1, LinesAxis1XConstant,
LinesAxis1YConstant, LineAxis0Multi,
LinesAxis1Ragged)
LinesAxis1Ragged, LineAxis1Geom)
from .reductions import any as any_rdn

validate_xy_or_geometry('Line', x, y, geometry)

if agg is None:
agg = any_rdn()

# Broadcast column specifications to handle cases where
# x is a list and y is a string or vice versa
orig_x, orig_y = x, y
x, y = _broadcast_column_specifications(x, y)
if geometry is not None:
glyph = LineAxis1Geom(geometry)
else:
# Broadcast column specifications to handle cases where
# x is a list and y is a string or vice versa
orig_x, orig_y = x, y
x, y = _broadcast_column_specifications(x, y)

if axis == 0:
if (isinstance(x, (Number, string_types)) and
isinstance(y, (Number, string_types))):
glyph = LineAxis0(x, y)
elif (isinstance(x, (list, tuple)) and
isinstance(y, (list, tuple))):
glyph = LineAxis0Multi(tuple(x), tuple(y))
else:
raise ValueError("""
if axis == 0:
if (isinstance(x, (Number, string_types)) and
isinstance(y, (Number, string_types))):
glyph = LineAxis0(x, y)
elif (isinstance(x, (list, tuple)) and
isinstance(y, (list, tuple))):
glyph = LineAxis0Multi(tuple(x), tuple(y))
else:
raise ValueError("""
Invalid combination of x and y arguments to Canvas.line when axis=0.
Received:
x: {x}
y: {y}
See docstring for more information on valid usage""".format(
x=repr(orig_x), y=repr(orig_y)))
x=repr(orig_x), y=repr(orig_y)))

elif axis == 1:
if isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)):
glyph = LinesAxis1(tuple(x), tuple(y))
elif (isinstance(x, np.ndarray) and
isinstance(y, (list, tuple))):
glyph = LinesAxis1XConstant(x, tuple(y))
elif (isinstance(x, (list, tuple)) and
isinstance(y, np.ndarray)):
glyph = LinesAxis1YConstant(tuple(x), y)
elif (isinstance(x, (Number, string_types)) and
isinstance(y, (Number, string_types))):
glyph = LinesAxis1Ragged(x, y)
else:
raise ValueError("""
elif axis == 1:
if isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)):
glyph = LinesAxis1(tuple(x), tuple(y))
elif (isinstance(x, np.ndarray) and
isinstance(y, (list, tuple))):
glyph = LinesAxis1XConstant(x, tuple(y))
elif (isinstance(x, (list, tuple)) and
isinstance(y, np.ndarray)):
glyph = LinesAxis1YConstant(tuple(x), y)
elif (isinstance(x, (Number, string_types)) and
isinstance(y, (Number, string_types))):
glyph = LinesAxis1Ragged(x, y)
else:
raise ValueError("""
Invalid combination of x and y arguments to Canvas.line when axis=1.
Received:
x: {x}
y: {y}
See docstring for more information on valid usage""".format(
x=repr(orig_x), y=repr(orig_y)))
x=repr(orig_x), y=repr(orig_y)))

else:
raise ValueError("""
else:
raise ValueError("""
The axis argument to Canvas.line must be 0 or 1
Received: {axis}""".format(axis=axis))

@@ -575,6 +608,64 @@ def area(self, source, x, y, agg=None, axis=0, y_stack=None):

return bypixel(source, self, glyph, agg)

def polygons(self, source, geometry, agg=None):
"""Compute a reduction by pixel, mapping data to pixels as one or
more polygons.

Parameters
----------
source : xarray.DataArray or Dataset
The input datasource.
geometry : str
Column name of a PolygonsArray of the coordinates of each line.
agg : Reduction, optional
Reduction to compute. Default is ``any()``.

Returns
-------
data : xarray.DataArray

Examples
--------
>>> from math import inf # doctest: +SKIP
... import datashader as ds
... import datashader.transfer_functions as tf
... from datashader.geom import PolygonsArray
... import pandas as pd
...
... polygons = PolygonsArray([
... # ## First Element
... # Filled quadrilateral (CCW order)
... [0, 0, 1, 0, 2, 2, -1, 4, 0, 0,
... # Triangular hole (CW order)
... -inf, -inf, 0.5, 1, 1, 2, 1.5, 1.5, 0.5, 1,
... # Rectangular hole (CW order)
... -inf, -inf, 0, 2, 0, 2.5, 0.5, 2.5, 0.5, 2, 0, 2,
... # Filled triangle
... inf, inf, 2.5, 3, 3.5, 3, 3.5, 4, 2.5, 3,
... ],
...
... # ## Second Element
... # Filled rectangle (CCW order)
... [3, 0, 3, 2, 4, 2, 4, 0, 3, 0,
... # Rectangular hole (CW order)
... -inf, -inf, 3.25, 0.25, 3.75, 0.25, 3.75, 1.75, 3.25, 1.75, 3.25, 0.25,
... ]
... ])
...
... df = pd.DataFrame({'polygons': polygons, 'v': range(len(polygons))})
...
... cvs = ds.Canvas()
... agg = cvs.polygons(df, geometry='polygons', agg=ds.sum('v'))
... tf.shade(agg)
"""
from .glyphs import PolygonGeom
from .reductions import any as any_rdn
if agg is None:
agg = any_rdn()
glyph = PolygonGeom(geometry)
return bypixel(source, self, glyph, agg)

def quadmesh(self, source, x=None, y=None, agg=None):
"""Samples a recti- or curvi-linear quadmesh by canvas size and bounds.
Parameters
3 changes: 2 additions & 1 deletion datashader/data_libraries/pandas.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@

from datashader.core import bypixel
from datashader.compiler import compile_components
from datashader.glyphs.points import _PointLike
from datashader.glyphs.points import _PointLike, _GeomLike
from datashader.glyphs.area import _AreaToLineLike
from datashader.utils import Dispatcher
from collections import OrderedDict
@@ -21,6 +21,7 @@ def pandas_pipeline(df, schema, canvas, glyph, summary):


@glyph_dispatch.register(_PointLike)
@glyph_dispatch.register(_GeomLike)
@glyph_dispatch.register(_AreaToLineLike)
def default(glyph, source, schema, canvas, summary, cuda=False):
create, info, append, _, finalize = compile_components(summary, schema, glyph, cuda)
104 changes: 59 additions & 45 deletions datashader/datatypes.py
Original file line number Diff line number Diff line change
@@ -74,25 +74,26 @@ def _validate_ragged_properties(start_indices, flat_array):
m=len(flat_array), vals=repr(some_invalid_vals)))


def _ragged_or_nan(a):
if np.isscalar(a) and np.isnan(a):
return a
else:
return _RaggedElement(a)


def _array_or_nan(a):
if np.isscalar(a) and np.isnan(a) or isinstance(a, np.ndarray):
return a
elif isinstance(a, _RaggedElement):
return a.array
else:
return a


# Internal ragged element array wrapper that provides
# equality, ordering, and hashing.
@total_ordering
class _RaggedElement(object):

@staticmethod
def ragged_or_nan(a):
if np.isscalar(a) and np.isnan(a):
return a
else:
return _RaggedElement(a)

@staticmethod
def array_or_nan(a):
if np.isscalar(a) and np.isnan(a):
return a
else:
return a.array

def __init__(self, array):
self.array = array

@@ -124,12 +125,13 @@ class RaggedDtype(ExtensionDtype):
"""
type = np.ndarray
base = np.dtype('O')
_subtype_re = re.compile(r"^ragged\[(?P<subtype>\w+)\]$")
_metadata = ('_dtype',)
_type_name = 'Ragged'
_subtype_re = re.compile(r"^ragged\[(?P<subtype>\w+)\]$")

@property
def name(self):
return 'Ragged[{subtype}]'.format(subtype=self.subtype)
return '{name}[{subtype}]'.format(name=self._type_name, subtype=self.subtype)

def __repr__(self):
return self.name
@@ -143,12 +145,12 @@ def construct_from_string(cls, string):
# lowercase string
string = string.lower()

msg = "Cannot construct a 'RaggedDtype' from '{}'"
if string.startswith('ragged'):
msg = "Cannot construct a '%s' from '{}'" % cls.__name__
if string.startswith(cls._type_name.lower()):
# Extract subtype
try:
subtype_string = cls._parse_subtype(string)
return RaggedDtype(dtype=subtype_string)
return cls(dtype=subtype_string)
except Exception:
raise TypeError(msg.format(string))
else:
@@ -189,7 +191,7 @@ def _parse_subtype(cls, dtype_string):
match = cls._subtype_re.match(dtype_string)
if match:
subtype_string = match.groupdict()['subtype']
elif dtype_string == 'ragged':
elif dtype_string == cls._type_name.lower():
subtype_string = 'float64'
else:
raise ValueError("Cannot parse {dtype_string}".format(
@@ -208,6 +210,8 @@ class RaggedArray(ExtensionArray):
Methods not otherwise documented here are inherited from ExtensionArray;
please see the corresponding method on that class for the docstring
"""
_element_type = None

def __init__(self, data, dtype=None, copy=False):
"""
Construct a RaggedArray
@@ -264,6 +268,9 @@ def __init__(self, data, dtype=None, copy=False):
self._start_indices = self._start_indices.copy()
self._flat_array = self._flat_array.copy()
else:
# Unwrap RaggedElements to numpy arrays
data = [_array_or_nan(el) for el in data]

# Compute lengths
index_len = len(data)
buffer_len = sum(len(datum)
@@ -307,15 +314,20 @@ def __init__(self, data, dtype=None, copy=False):
# increment next start index
next_start_ind += n

self._dtype = RaggedDtype(dtype=dtype)
self._dtype = self._dtype_class(dtype=dtype)

@property
def _dtype_class(self):
return RaggedDtype

def __eq__(self, other):
if isinstance(other, RaggedArray):
if isinstance(other, type(self)):
if len(other) != len(self):
raise ValueError("""
Cannot check equality of RaggedArray values of unequal length
Cannot check equality of {typ} values of unequal length
len(ra1) == {len_ra1}
len(ra2) == {len_ra2}""".format(
typ=self.__class__.__name__,
len_ra1=len(self),
len_ra2=len(other)))

@@ -350,8 +362,8 @@ def __eq__(self, other):
self.start_indices, self.flat_array, other_array)
else:
raise ValueError("""
Cannot check equality of RaggedArray of length {ra_len} with:
{other}""".format(ra_len=len(self), other=repr(other)))
Cannot check equality of {typ} of length {ra_len} with:
{other}""".format(typ=type(self).__name__, ra_len=len(self), other=repr(other)))

return result

@@ -399,45 +411,47 @@ def __getitem__(self, item):
if item + 1 <= len(self) - 1
else len(self.flat_array))

return (self.flat_array[slice_start:slice_end]
if slice_end!=slice_start
else np.nan)
if slice_end == slice_start:
return np.nan
elif self._element_type is None:
return self.flat_array[slice_start:slice_end]
else:
return self._element_type(self.flat_array[slice_start:slice_end])

elif type(item) == slice:
data = []
selected_indices = np.arange(len(self))[item]

for selected_index in selected_indices:
data.append(self[selected_index])
data.append(_array_or_nan(self[selected_index]))

return RaggedArray(data, dtype=self.flat_array.dtype)
return self.__class__(data, dtype=self.flat_array.dtype)

elif isinstance(item, np.ndarray) and item.dtype == 'bool':
data = []

for i, m in enumerate(item):
if m:
data.append(self[i])
data.append(_array_or_nan(self[i]))

return RaggedArray(data, dtype=self.flat_array.dtype)
return self.__class__(data, dtype=self.flat_array.dtype)
elif isinstance(item, (list, np.ndarray)):
return self.take(item, allow_fill=False)
else:
raise IndexError(item)

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
return RaggedArray(scalars, dtype=dtype)
return cls(scalars, dtype=dtype)

@classmethod
def _from_factorized(cls, values, original):
return RaggedArray(
[_RaggedElement.array_or_nan(v) for v in values],
return cls(
[_array_or_nan(v) for v in values],
dtype=original.flat_array.dtype)

def _as_ragged_element_array(self):
return np.array([_RaggedElement.ragged_or_nan(self[i])
for i in range(len(self))])
return np.array([_ragged_or_nan(self[i]) for i in range(len(self))])

def _values_for_factorize(self):
return self._as_ragged_element_array(), np.nan
@@ -450,7 +464,7 @@ def unique(self):

uniques = unique(self._as_ragged_element_array())
return self._from_sequence(
[_RaggedElement.array_or_nan(v) for v in uniques],
[_array_or_nan(v) for v in uniques],
dtype=self.dtype)

def fillna(self, value=None, method=None, limit=None):
@@ -462,7 +476,7 @@ def fillna(self, value=None, method=None, limit=None):

mask = self.isna()

if isinstance(value, RaggedArray):
if isinstance(value, type(self)):
if len(value) != len(self):
raise ValueError("Length of 'value' does not match. Got ({}) "
" expected {}".format(len(value), len(self)))
@@ -511,7 +525,7 @@ def shift(self, periods=1, fill_value=None):

def searchsorted(self, value, side="left", sorter=None):
arr = self._as_ragged_element_array()
if isinstance(value, RaggedArray):
if isinstance(value, type(self)):
search_value = value._as_ragged_element_array()
else:
search_value = _RaggedElement(value)
@@ -537,16 +551,16 @@ def take(self, indices, allow_fill=False, fill_value=None):
if len(self) == 0 and len(indices) > 0:
raise IndexError("cannot do a non-empty take")

sequence = [self[i] for i in indices]
sequence = [_array_or_nan(self[i]) for i in indices]

return RaggedArray(sequence, dtype=self.flat_array.dtype)
return self.__class__(sequence, dtype=self.flat_array.dtype)

def copy(self, deep=False):
data = dict(
flat_array=self.flat_array,
start_indices=self.start_indices)

return RaggedArray(data, copy=deep)
return self.__class__(data, copy=deep)

@classmethod
def _concat_same_type(cls, to_concat):
@@ -561,7 +575,7 @@ def _concat_same_type(cls, to_concat):
start_indices = np.hstack([ra.start_indices + offset
for offset, ra in zip(offsets, to_concat)])

return RaggedArray(dict(
return cls(dict(
flat_array=flat_array, start_indices=start_indices),
copy=False)

4 changes: 4 additions & 0 deletions datashader/geom/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .lines import Lines, LinesArray, LinesDtype # noqa (API import)
from .points import Points, PointsArray, PointsDtype # noqa (API import)
from .polygons import Polygons, PolygonsArray, PolygonsDtype # noqa (API import)
from .base import Geom, GeomArray, GeomDtype # noqa (API import)
183 changes: 183 additions & 0 deletions datashader/geom/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from __future__ import absolute_import
from math import isfinite, inf
import re
from functools import total_ordering
import numpy as np
from pandas.core.dtypes.dtypes import register_extension_dtype

from datashader.datatypes import _RaggedElement, RaggedDtype, RaggedArray
from datashader.utils import ngjit


@total_ordering
class Geom(_RaggedElement):
def __repr__(self):
data = [(x, y) for x, y in zip(self.xs, self.ys)]
return "{typ}({data})".format(typ=self.__class__.__name__, data=data)

@classmethod
def _shapely_to_array_parts(cls, shape):
raise NotImplementedError()

@classmethod
def from_shapely(cls, shape):
shape_parts = cls._shapely_to_array_parts(shape)
return cls(np.concatenate(shape_parts))

@property
def xs(self):
return self.array[0::2]

@property
def ys(self):
return self.array[1::2]

@property
def bounds(self):
return bounds_interleaved(self.array)

@property
def bounds_x(self):
return bounds_interleaved_1d(self.array, 0)

@property
def bounds_y(self):
return bounds_interleaved_1d(self.array, 1)

@property
def length(self):
raise NotImplementedError()

@property
def area(self):
raise NotImplementedError()


@register_extension_dtype
class GeomDtype(RaggedDtype):
_type_name = "Geom"
_subtype_re = re.compile(r"^geom\[(?P<subtype>\w+)\]$")

@classmethod
def construct_array_type(cls):
return GeomArray


class GeomArray(RaggedArray):
_element_type = Geom

def __init__(self, *args, **kwargs):
super(GeomArray, self).__init__(*args, **kwargs)
# Validate that there are an even number of elements in each Geom element
if (any(self.start_indices % 2) or
len(self) and (len(self.flat_array) - self.start_indices[-1]) % 2 > 0):
raise ValueError("There must be an even number of elements in each row")

@property
def _dtype_class(self):
return GeomDtype

@property
def xs(self):
start_indices = self.start_indices // 2
flat_array = self.flat_array[0::2]
return RaggedArray({"start_indices": start_indices, "flat_array": flat_array})

@property
def ys(self):
start_indices = self.start_indices // 2
flat_array = self.flat_array[1::2]
return RaggedArray({"start_indices": start_indices, "flat_array": flat_array})

def to_geopandas(self):
from geopandas.array import from_shapely
return from_shapely([el.to_shapely() for el in self])

@classmethod
def from_geopandas(cls, ga):
line_parts = [
cls._element_type._shapely_to_array_parts(shape) for shape in ga
]
line_lengths = [
sum([len(part) for part in parts])
for parts in line_parts
]
flat_array = np.concatenate(
[part for parts in line_parts for part in parts]
)
start_indices = np.concatenate(
[[0], line_lengths[:-1]]
).astype('uint').cumsum()
return cls({
'start_indices': start_indices, 'flat_array': flat_array
})

@property
def bounds(self):
return bounds_interleaved(self.flat_array)

@property
def bounds_x(self):
return bounds_interleaved_1d(self.flat_array, 0)

@property
def bounds_y(self):
return bounds_interleaved_1d(self.flat_array, 1)

@property
def length(self):
raise NotImplementedError()

@property
def area(self):
raise NotImplementedError()


@ngjit
def _geom_map(start_indices, flat_array, result, fn):
n = len(start_indices)
for i in range(n):
start = start_indices[i]
stop = start_indices[i + 1] if i < n - 1 else len(flat_array)
result[i] = fn(flat_array[start:stop])


@ngjit
def bounds_interleaved(values):
"""
compute bounds
"""
xmin = inf
ymin = inf
xmax = -inf
ymax = -inf

for i in range(0, len(values), 2):
x = values[i]
if isfinite(x):
xmin = min(xmin, x)
xmax = max(xmax, x)

y = values[i + 1]
if isfinite(y):
ymin = min(ymin, y)
ymax = max(ymax, y)

return (xmin, ymin, xmax, ymax)


@ngjit
def bounds_interleaved_1d(values, offset):
"""
compute bounds
"""
vmin = inf
vmax = -inf

for i in range(0, len(values), 2):
v = values[i + offset]
if isfinite(v):
vmin = min(vmin, v)
vmax = max(vmax, v)

return (vmin, vmax)
117 changes: 117 additions & 0 deletions datashader/geom/lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from __future__ import absolute_import, division
import re
from math import sqrt, isfinite
from functools import total_ordering
import numpy as np

from pandas.core.dtypes.dtypes import register_extension_dtype

from datashader.geom.base import Geom, GeomDtype, GeomArray, _geom_map
from datashader.utils import ngjit


try:
# See if we can register extension type with dask >= 1.1.0
from dask.dataframe.extensions import make_array_nonempty
except ImportError:
make_array_nonempty = None


@total_ordering
class Lines(Geom):
@classmethod
def _shapely_to_array_parts(cls, shape):
import shapely.geometry as sg
if isinstance(shape, (sg.LineString, sg.LinearRing)):
# Single line
return [np.asarray(shape.ctypes)]
elif isinstance(shape, sg.MultiLineString):
shape = list(shape)
line_parts = [np.asarray(shape[0].ctypes)]
line_separator = np.array([np.inf, np.inf])
for line in shape[1:]:
line_parts.append(line_separator)
line_parts.append(np.asarray(line.ctypes))
return line_parts
else:
raise ValueError("""
Received invalid value of type {typ}. Must be an instance of LineString,
MultiLineString, or LinearRing""".format(typ=type(shape).__name__))

def to_shapely(self):
import shapely.geometry as sg
line_breaks = np.concatenate(
[[-2], np.nonzero(~np.isfinite(self.array))[0][0::2], [len(self.array)]]
)
line_arrays = [self.array[start + 2:stop]
for start, stop in zip(line_breaks[:-1], line_breaks[1:])]

lines = [sg.LineString(line_array.reshape(len(line_array) // 2, 2))
for line_array in line_arrays]

if len(lines) == 1:
return lines[0]
else:
return sg.MultiLineString(lines)

@property
def length(self):
return compute_length(self.array)

@property
def area(self):
return 0.0


@register_extension_dtype
class LinesDtype(GeomDtype):
_type_name = "Lines"
_subtype_re = re.compile(r"^lines\[(?P<subtype>\w+)\]$")

@classmethod
def construct_array_type(cls):
return LinesArray


class LinesArray(GeomArray):
_element_type = Lines

@property
def _dtype_class(self):
return LinesDtype

@property
def length(self):
result = np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)
_geom_map(self.start_indices, self.flat_array, result, compute_length)
return result

@property
def area(self):
return np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)


@ngjit
def compute_length(values):
total_len = 0.0
x0 = values[0]
y0 = values[1]
for i in range(2, len(values), 2):
x1 = values[i]
y1 = values[i+1]

if isfinite(x0) and isfinite(y0) and isfinite(x1) and isfinite(y1):
total_len += sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2)

x0 = x1
y0 = y1

return total_len


def lines_array_non_empty(dtype):
return LinesArray([[1, 0, 1, 1], [1, 2, 0, 0]], dtype=dtype)


if make_array_nonempty:
make_array_nonempty.register(LinesDtype)(lines_array_non_empty)
76 changes: 76 additions & 0 deletions datashader/geom/points.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import re
from functools import total_ordering
import numpy as np
from pandas.core.dtypes.dtypes import register_extension_dtype

from datashader.geom.base import Geom, GeomDtype, GeomArray


try:
# See if we can register extension type with dask >= 1.1.0
from dask.dataframe.extensions import make_array_nonempty
except ImportError:
make_array_nonempty = None


@total_ordering
class Points(Geom):
@classmethod
def _shapely_to_array_parts(cls, shape):
import shapely.geometry as sg
if isinstance(shape, (sg.Point, sg.MultiPoint)):
# Single line
return [np.asarray(shape.ctypes)]
else:
raise ValueError("""
Received invalid value of type {typ}. Must be an instance of Point,
or MultiPoint""".format(typ=type(shape).__name__))

def to_shapely(self):
import shapely.geometry as sg
if len(self.array) == 2:
return sg.Point(self.array)
else:
return sg.MultiPoint(self.array.reshape(len(self.array) // 2, 2))

@property
def length(self):
return 0.0

@property
def area(self):
return 0.0


@register_extension_dtype
class PointsDtype(GeomDtype):
_type_name = "Points"
_subtype_re = re.compile(r"^points\[(?P<subtype>\w+)\]$")

@classmethod
def construct_array_type(cls):
return PointsArray


class PointsArray(GeomArray):
_element_type = Points

@property
def _dtype_class(self):
return PointsDtype

@property
def length(self):
return np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)

@property
def area(self):
return np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)


def points_array_non_empty(dtype):
return PointsArray([[1, 0, 0, 0], [1, 2, 0, 0]], dtype=dtype)


if make_array_nonempty:
make_array_nonempty.register(PointsDtype)(points_array_non_empty)
177 changes: 177 additions & 0 deletions datashader/geom/polygons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
from __future__ import absolute_import
import re
from math import isfinite
from functools import total_ordering

import numpy as np
from pandas.core.dtypes.dtypes import register_extension_dtype

from datashader.geom.base import Geom, GeomDtype, GeomArray, _geom_map
from datashader.geom.lines import compute_length
from datashader.utils import ngjit


try:
# See if we can register extension type with dask >= 1.1.0
from dask.dataframe.extensions import make_array_nonempty
except ImportError:
make_array_nonempty = None


@total_ordering
class Polygons(Geom):
@staticmethod
def _polygon_to_array_parts(polygon):
import shapely.geometry as sg
shape = sg.polygon.orient(polygon)
exterior = np.asarray(shape.exterior.ctypes)
polygon_parts = [exterior]
hole_separator = np.array([-np.inf, -np.inf])
for ring in shape.interiors:
interior = np.asarray(ring.ctypes)
polygon_parts.append(hole_separator)
polygon_parts.append(interior)
return polygon_parts

@classmethod
def _shapely_to_array_parts(cls, shape):
import shapely.geometry as sg
if isinstance(shape, sg.Polygon):
# Single polygon
return Polygons._polygon_to_array_parts(shape)
elif isinstance(shape, sg.MultiPolygon):
shape = list(shape)
polygon_parts = Polygons._polygon_to_array_parts(shape[0])
polygon_separator = np.array([np.inf, np.inf])
for polygon in shape[1:]:
polygon_parts.append(polygon_separator)
polygon_parts.extend(Polygons._polygon_to_array_parts(polygon))
return polygon_parts
else:
raise ValueError("""
Received invalid value of type {typ}. Must be an instance of
shapely.geometry.Polygon or shapely.geometry.MultiPolygon"""
.format(typ=type(shape).__name__))

def to_shapely(self):
import shapely.geometry as sg
ring_breaks = np.concatenate(
[[-2], np.nonzero(~np.isfinite(self.array))[0][0::2], [len(self.array)]]
)
polygon_breaks = set(np.concatenate(
[[-2], np.nonzero(np.isposinf(self.array))[0][0::2], [len(self.array)]]
))

# Build rings for both outer and holds
rings = []
for start, stop in zip(ring_breaks[:-1], ring_breaks[1:]):
ring_array = self.array[start + 2: stop]
ring_pairs = ring_array.reshape(len(ring_array) // 2, 2)
rings.append(sg.LinearRing(ring_pairs))

# Build polygons
polygons = []
outer = None
holes = []
for ring, start in zip(rings, ring_breaks[:-1]):
if start in polygon_breaks:
if outer:
# This is the first ring in a new polygon, construct shapely polygon
# with already collected rings
polygons.append(sg.Polygon(outer, holes))

# Start collecting new polygon
outer = ring
holes = []
else:
# Ring is a hole
holes.append(ring)

# Build final polygon
polygons.append(sg.Polygon(outer, holes))

if len(polygons) == 1:
return polygons[0]
else:
return sg.MultiPolygon(polygons)

@property
def length(self):
return compute_length(self.array)

@property
def area(self):
return compute_area(self.array)


@register_extension_dtype
class PolygonsDtype(GeomDtype):
_type_name = "Polygons"
_subtype_re = re.compile(r"^polygons\[(?P<subtype>\w+)\]$")

@classmethod
def construct_array_type(cls):
return PolygonsArray


class PolygonsArray(GeomArray):
_element_type = Polygons

@property
def _dtype_class(self):
return PolygonsDtype

@property
def length(self):
result = np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)
_geom_map(self.start_indices, self.flat_array, result, compute_length)
return result

@property
def area(self):
result = np.zeros(self.start_indices.shape, dtype=self.flat_array.dtype)
_geom_map(self.start_indices, self.flat_array, result, compute_area)
return result


@ngjit
def compute_area(values):
area = 0.0
if len(values) < 6:
# A degenerate polygon
return 0.0
polygon_start = 0
for k in range(0, len(values) - 4, 2):
i, j = k + 2, k + 4
ix = values[i]
jy = values[j + 1]
ky = values[k + 1]
if not isfinite(values[j]):
# last vertex not finite, polygon traversal finished, add wraparound term
polygon_stop = j
firstx = values[polygon_start]
secondy = values[polygon_start + 3]
lasty = values[polygon_stop - 3]
area += firstx * (secondy - lasty)
elif not isfinite(values[i]):
# middle vertex not finite, but last vertex is.
# We're going to start a new polygon
polygon_start = j
elif isfinite(ix) and isfinite(jy) and isfinite(ky):
area += ix * (jy - ky)

# wrap-around term for final polygon
firstx = values[polygon_start]
secondy = values[polygon_start + 3]
lasty = values[len(values) - 3]
area += firstx * (secondy - lasty)

return area / 2.0


def polygons_array_non_empty(dtype):
return PolygonsArray([[1, 0, 0, 0, 2, 2], [1, 2, 0, 0, 2, 2]], dtype=dtype)


if make_array_nonempty:
make_array_nonempty.register(PolygonsDtype)(polygons_array_non_empty)
4 changes: 3 additions & 1 deletion datashader/glyphs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import absolute_import
from .points import Point # noqa (API import)
from .points import Point, PointGeom # noqa (API import)
from .line import ( # noqa (API import)
LineAxis0,
LineAxis0Multi,
LinesAxis1,
LinesAxis1XConstant,
LinesAxis1YConstant,
LinesAxis1Ragged,
LineAxis1Geom,
)
from .area import ( # noqa (API import)
AreaToZeroAxis0,
@@ -23,6 +24,7 @@
AreaToLineAxis1Ragged,
)
from .trimesh import Triangles # noqa (API import)
from .polygon import PolygonGeom # noqa (API import)
from .quadmesh import ( # noqa (API import)
QuadMeshRectilinear, QuadMeshCurvialinear
)
94 changes: 93 additions & 1 deletion datashader/glyphs/line.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from __future__ import absolute_import, division
from math import isfinite
import numpy as np
from toolz import memoize

from datashader.glyphs.points import _PointLike, _GeomLike
from datashader.glyphs.glyph import isnull
from datashader.glyphs.points import _PointLike
from datashader.utils import isreal, ngjit
from numba import cuda

@@ -458,6 +459,35 @@ def extend(aggs, df, vt, bounds, plot_start=True):
return extend


class LineAxis1Geom(_GeomLike):
@memoize
def _build_extend(self, x_mapper, y_mapper, info, append):
expand_aggs_and_cols = self.expand_aggs_and_cols(append)
map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
draw_segment = _build_draw_segment(
append, map_onto_pixel, expand_aggs_and_cols
)

perform_extend_cpu = _build_extend_line_axis1_geom(
draw_segment, expand_aggs_and_cols
)
geom_name = self.geometry

def extend(aggs, df, vt, bounds, plot_start=True):
sx, tx, sy, ty = vt
xmin, xmax, ymin, ymax = bounds
aggs_and_cols = aggs + info(df)
geom_array = df[geom_name].array
# line may be clipped, then mapped to pixels
perform_extend_cpu(
sx, tx, sy, ty,
xmin, xmax, ymin, ymax,
geom_array, *aggs_and_cols
)

return extend


def _build_map_onto_pixel_for_line(x_mapper, y_mapper):
@ngjit
def map_onto_pixel(sx, tx, sy, ty, xmin, xmax, ymin, ymax, x, y):
@@ -909,3 +939,65 @@ def extend_cpu_numba(
segment_start, x0, x1, y0, y1, *aggs_and_cols)

return extend_cpu


def _build_extend_line_axis1_geom(
draw_segment, expand_aggs_and_cols
):
def extend_cpu(
sx, tx, sy, ty,
xmin, xmax, ymin, ymax,
geom_array, *aggs_and_cols
):
start_i = geom_array.start_indices
flat = geom_array.flat_array

extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
start_i, flat, *aggs_and_cols
)

@ngjit
@expand_aggs_and_cols
def extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
start_i, flat, *aggs_and_cols
):
nrows = len(start_i)
flat_len = len(flat)

for i in range(nrows):
# Get x index range
start_index = int(start_i[i])
stop_index = int(start_i[i + 1]
if i < nrows - 1
else flat_len)

for j in range(start_index, stop_index - 2, 2):

x0 = flat[j]
if not isfinite(x0):
continue

y0 = flat[j + 1]
if not isfinite(y0):
continue

x1 = flat[j + 2]
if not isfinite(x1):
continue

y1 = flat[j + 3]
if not isfinite(y1):
continue

segment_start = (
(j == start_index) or
not isfinite(flat[j - 2]) or
not isfinite(flat[j - 1])
)

draw_segment(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
segment_start, x0, x1, y0, y1, *aggs_and_cols)

return extend_cpu
116 changes: 116 additions & 0 deletions datashader/glyphs/points.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,61 @@ def values(s):
return s.values


class _GeomLike(Glyph):
def __init__(self, geometry):
self.geometry = geometry

@property
def ndims(self):
return 1

@property
def inputs(self):
return (self.geometry,)

@property
def geom_dtype(self):
from datashader.geom import GeomDtype
return GeomDtype

def validate(self, in_dshape):
if not isinstance(in_dshape[str(self.geometry)], self.geom_dtype):
raise ValueError('{col} must be a {typ} array'.format(
col=self.geometry, typ=self.geom_dtype._type_name
))

@property
def x_label(self):
return 'x'

@property
def y_label(self):
return 'y'

def required_columns(self):
return [self.geometry]

def compute_x_bounds(self, df):
bounds = df[self.geometry].array.bounds_x
return self.maybe_expand_bounds(bounds)

def compute_y_bounds(self, df):
bounds = df[self.geometry].array.bounds_y
return self.maybe_expand_bounds(bounds)

@memoize
def compute_bounds_dask(self, ddf):
r = ddf.map_partitions(lambda df: np.array(
[list(df[self.geometry].array.bounds)]
)).compute()

x_extents = np.nanmin(r[:, 0]), np.nanmax(r[:, 2])
y_extents = np.nanmin(r[:, 1]), np.nanmax(r[:, 3])

return (self.maybe_expand_bounds(x_extents),
self.maybe_expand_bounds(y_extents))


class _PointLike(Glyph):
"""Shared methods between Point and Line"""
def __init__(self, x, y):
@@ -142,3 +197,64 @@ def extend(aggs, df, vt, bounds):
)

return extend


class PointGeom(_GeomLike):
@property
def geom_dtype(self):
from datashader.geom import PointsDtype
return PointsDtype

@memoize
def _build_extend(self, x_mapper, y_mapper, info, append):
geometry_name = self.geometry

@ngjit
@self.expand_aggs_and_cols(append)
def _perform_extend_points(
i, j, sx, tx, sy, ty, xmin, xmax, ymin, ymax, flat_array, *aggs_and_cols
):
x = flat_array[j]
y = flat_array[j + 1]
# points outside bounds are dropped; remainder
# are mapped onto pixels
if (xmin <= x <= xmax) and (ymin <= y <= ymax):
xx = int(x_mapper(x) * sx + tx)
yy = int(y_mapper(y) * sy + ty)
xi, yi = (xx - 1 if x == xmax else xx,
yy - 1 if y == ymax else yy)

append(i, xi, yi, *aggs_and_cols)

@ngjit
@self.expand_aggs_and_cols(append)
def extend_cpu(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
flat_array, start_indices, *aggs_and_cols
):
n = len(start_indices)
m = len(flat_array)
for i in range(n):
start = start_indices[i]
stop = start_indices[i + 1] if i < n - 1 else m
for j in range(start, stop, 2):
_perform_extend_points(
i, j, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
flat_array, *aggs_and_cols
)

def extend(aggs, df, vt, bounds):
aggs_and_cols = aggs + info(df)
sx, tx, sy, ty = vt
xmin, xmax, ymin, ymax = bounds

geometry = df[geometry_name].array
flat_array = geometry.flat_array
start_indices = geometry.start_indices

extend_cpu(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
flat_array, start_indices, *aggs_and_cols
)

return extend
244 changes: 244 additions & 0 deletions datashader/glyphs/polygon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
from math import inf, isfinite, nan

from toolz import memoize
import numpy as np

from datashader.glyphs.line import _build_map_onto_pixel_for_line
from datashader.glyphs.points import _GeomLike
from datashader.utils import ngjit


class PolygonGeom(_GeomLike):
@property
def geom_dtype(self):
from datashader.geom import PolygonsDtype
return PolygonsDtype

@memoize
def _build_extend(self, x_mapper, y_mapper, info, append):
expand_aggs_and_cols = self.expand_aggs_and_cols(append)
map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
draw_segment = _build_draw_polygon(
append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols
)

perform_extend_cpu = _build_extend_polygon_geom(
draw_segment, expand_aggs_and_cols
)
geom_name = self.geometry

def extend(aggs, df, vt, bounds, plot_start=True):
sx, tx, sy, ty = vt
xmin, xmax, ymin, ymax = bounds
aggs_and_cols = aggs + info(df)
geom_array = df[geom_name].array
# line may be clipped, then mapped to pixels
perform_extend_cpu(
sx, tx, sy, ty,
xmin, xmax, ymin, ymax,
geom_array, *aggs_and_cols
)

return extend


def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols):
@ngjit
@expand_aggs_and_cols
def draw_polygon(
i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
start_index, stop_index, flat, xs, ys, yincreasing, eligible,
*aggs_and_cols
):
"""Draw a polygon using a winding-number scan-line algorithm
"""
# Initialize values of pre-allocated buffers
xs.fill(nan)
ys.fill(nan)
yincreasing.fill(0)
eligible.fill(1)

# First pass, compute bounding box in data coordinates and count number of edges
num_edges = 0
poly_xmin = inf
poly_ymin = inf
poly_xmax = -inf
poly_ymax = -inf
for j in range(start_index, stop_index - 2, 2):
x = flat[j]
y = flat[j + 1]
if isfinite(x) and isfinite(y):
poly_xmin = min(poly_xmin, x)
poly_ymin = min(poly_ymin, y)
poly_xmax = max(poly_xmax, x)
poly_ymax = max(poly_ymax, y)
if isfinite(flat[j + 2]) and isfinite(flat[j + 3]):
# Valid edge
num_edges += 1

# skip polygon if outside viewport
if (poly_xmax < xmin or poly_xmin > xmax
or poly_ymax < ymin or poly_ymin > ymax):
return

# Compute pixel bounds for polygon
startxi, startyi = map_onto_pixel(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
max(poly_xmin, xmin), max(poly_ymin, ymin)
)
stopxi, stopyi = map_onto_pixel(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
min(poly_xmax, xmax), min(poly_ymax, ymax)
)
stopxi += 1
stopyi += 1

# Handle subpixel polygons (pixel width or height of polygon is 1)
if (stopxi - startxi) == 1 or (stopyi - startyi) == 1:
for yi in range(startyi, stopyi):
for xi in range(startxi, stopxi):
append(i, xi, yi, *aggs_and_cols)
return

# Build arrays of edges in canvas coordinates
ei = 0
for j in range(start_index, stop_index - 2, 2):
x0 = flat[j]
y0 = flat[j + 1]
x1 = flat[j + 2]
y1 = flat[j + 3]
if (isfinite(x0) and isfinite(y0) and
isfinite(y0) and isfinite(y1)):
# Map to canvas coordinates without rounding
x0c = x_mapper(x0) * sx + tx
y0c = y_mapper(y0) * sy + ty
x1c = x_mapper(x1) * sx + tx
y1c = y_mapper(y1) * sy + ty

if y1c > y0c:
xs[ei, 0] = x0c
ys[ei, 0] = y0c
xs[ei, 1] = x1c
ys[ei, 1] = y1c
yincreasing[ei] = 1
elif y1c < y0c:
xs[ei, 1] = x0c
ys[ei, 1] = y0c
xs[ei, 0] = x1c
ys[ei, 0] = y1c
yincreasing[ei] = -1
else:
# Skip horizontal edges
continue

ei += 1

# Perform scan-line algorithm
for yi in range(startyi, stopyi):
# All edges eligible at start of new row
eligible.fill(1)
for xi in range(startxi, stopxi):
# Init winding number
winding_number = 0
for ei in range(num_edges):
if eligible[ei] == 0:
# We've already determined that edge is above, below, or left
# of edge for the current pixel
continue

# Get edge coordinates.
# Note: y1c > y0c due to how xs/ys were populated
x0c = xs[ei, 0]
x1c = xs[ei, 1]
y0c = ys[ei, 0]
y1c = ys[ei, 1]

# Reject edges that are above, below, or left of current pixel.
# Note: Edge skipped if lower vertex overlaps,
# but is kept if upper vertex overlaps
if (y0c >= yi or y1c < yi
or (x0c < xi and x1c < xi)
):
# Edge not eligible for any remaining pixel in this row
eligible[ei] = 0
continue

if xi <= x0c and xi <= x1c:
# Edge is fully to the right of the pixel, so we know ray to the
# the right of pixel intersects edge.
winding_number += yincreasing[ei]
else:
# Now check if edge is to the right of pixel using cross product
# A is vector from pixel to first vertex
ax = x0c - xi
ay = y0c - yi

# B is vector from pixel to second vertex
bx = x1c - xi
by = y1c - yi

# Compute cross product of B and A
bxa = (bx * ay - by * ax)

if bxa < 0 or (bxa == 0 and yincreasing[ei]):
# Edge to the right
winding_number += yincreasing[ei]
else:
# Edge to left, not eligible for any remaining pixel in row
eligible[ei] = 0
continue

if winding_number != 0:
# If winding number is not zero, point
# is inside polygon
append(i, xi, yi, *aggs_and_cols)

return draw_polygon


def _build_extend_polygon_geom(
draw_polygon, expand_aggs_and_cols
):
def extend_cpu(
sx, tx, sy, ty, xmin, xmax, ymin, ymax, geom_array, *aggs_and_cols
):
start_i = geom_array.start_indices
flat = geom_array.flat_array

extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax, start_i, flat, *aggs_and_cols
)

@ngjit
@expand_aggs_and_cols
def extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax, start_i, flat, *aggs_and_cols
):
nrows = len(start_i)
flat_len = len(flat)

# Pre-allocate temp arrays
if len(start_i) > 1:
max_coordinates = max(np.diff(start_i))
else:
max_coordinates = 0
# Handle case where last polygon is the longest and divide by 2 to get max
# number of edges
max_edges = int(max(max_coordinates, len(flat) - start_i[-1]) // 2)

xs = np.full((max_edges, 2), nan, dtype=np.float32)
ys = np.full((max_edges, 2), nan, dtype=np.float32)
yincreasing = np.zeros(max_edges, dtype=np.int8)

# Initialize array indicating which edges are still eligible for processing
eligible = np.ones(max_edges, dtype=np.int8)

for i in range(nrows):
# Get x index range
start_index = start_i[i]
stop_index = (start_i[i + 1] if i < nrows - 1 else flat_len)
draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
int(start_index), int(stop_index), flat,
xs, ys, yincreasing, eligible, *aggs_and_cols)

return extend_cpu
163 changes: 102 additions & 61 deletions datashader/tests/test_dask.py

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions datashader/tests/test_geom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from math import inf
import numpy as np
from datashader.geom import (
Points, PointsArray, Lines, LinesArray, Polygons, PolygonsArray
)

unit_square_cw = np.array([1, 1, 1, 2, 2, 2, 2, 1, 1, 1], dtype='float64')
large_square_ccw = np.array([0, 0, 3, 0, 3, 3, 0, 3, 0, 0], dtype='float64')
hole_sep = np.array([-inf, -inf])
fill_sep = np.array([inf, inf])


def test_points():
points = Points(unit_square_cw)
assert points.length == 0.0
assert points.area == 0.0


def test_points_array():
points = PointsArray([
unit_square_cw,
large_square_ccw,
np.concatenate([large_square_ccw, hole_sep, unit_square_cw])
])

np.testing.assert_equal(points.length, [0.0, 0.0, 0.0])
np.testing.assert_equal(points.area, [0.0, 0.0, 0.0])


def test_lines():
lines = Lines(unit_square_cw)
assert lines.length == 4.0
assert lines.area == 0.0


def test_lines_array():
lines = LinesArray([
unit_square_cw,
large_square_ccw,
np.concatenate([large_square_ccw, hole_sep, unit_square_cw])
])

np.testing.assert_equal(lines.length, [4.0, 12.0, 16.0])
np.testing.assert_equal(lines.area, [0.0, 0.0, 0.0])


def test_polygons():
polygons = Polygons(np.concatenate([large_square_ccw, hole_sep, unit_square_cw]))
assert polygons.length == 16.0
assert polygons.area == 8.0


def test_polygons_array():
polygons = PolygonsArray([
large_square_ccw,
np.concatenate([large_square_ccw, hole_sep, unit_square_cw]),
unit_square_cw
])
np.testing.assert_equal(polygons.length, [12.0, 16.0, 4.0])
np.testing.assert_equal(polygons.area, [9.0, 8.0, -1.0])
152 changes: 97 additions & 55 deletions datashader/tests/test_pandas.py

Large diffs are not rendered by default.

198 changes: 198 additions & 0 deletions datashader/tests/test_polygons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import pytest
import pandas as pd
import numpy as np
import xarray as xr
from math import inf, nan
import datashader as ds
from datashader.tests.test_pandas import assert_eq_xr
import dask.dataframe as dd


def dask_DataFrame(*args, **kwargs):
return dd.from_pandas(pd.DataFrame(*args, **kwargs), npartitions=3)


DataFrames = [pd.DataFrame, dask_DataFrame]


@pytest.mark.parametrize('DataFrame', DataFrames)
def test_multipolygon_manual_range(DataFrame):
df = DataFrame({
'polygons': pd.Series([
[0, 0, 2, 0, 2, 2, 1, 3, 0, 0,
-inf, -inf, 1, 0.25, 1, 2, 1.75, .25, 0.25, 0.25,
inf, inf, 2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1
],
], dtype='Polygons[float64]'),
'v': [1]
})

cvs = ds.Canvas(plot_width=16, plot_height=16)
agg = cvs.polygons(df, geometry='polygons', agg=ds.count())

axis = ds.core.LinearAxis()
lincoords_x = axis.compute_index(
axis.compute_scale_and_translate((0., 4.), 16), 16)
lincoords_y = axis.compute_index(
axis.compute_scale_and_translate((0., 3.), 16), 16)

sol = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1],
[0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
], dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])

assert_eq_xr(agg, out)


@pytest.mark.parametrize('DataFrame', DataFrames)
def test_multiple_polygons_auto_range(DataFrame):
df = DataFrame({
'polygons': pd.Series([
[0, 0, 2, 0, 2, 2, 1, 3, 0, 0,
-inf, -inf, 1, 0.25, 1, 2, 1.75, .25, 0.25, 0.25,
inf, inf, 2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1
],
], dtype='Polygons[float64]'),
'v': [1]
})

cvs = ds.Canvas(plot_width=16, plot_height=16,
x_range=[-1, 3.5], y_range=[0.1, 2])
agg = cvs.polygons(df, geometry='polygons', agg=ds.count())

axis = ds.core.LinearAxis()
lincoords_x = axis.compute_index(
axis.compute_scale_and_translate((-1, 3.5), 16), 16)
lincoords_y = axis.compute_index(
axis.compute_scale_and_translate((0.1, 2), 16), 16)

sol = np.array([
[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1]
], dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])

assert_eq_xr(agg, out)


@pytest.mark.parametrize('DataFrame', DataFrames)
def test_no_overlap(DataFrame):
df = DataFrame({
'polygons': pd.Series([
[1, 1, 2, 2, 1, 3, 0, 2, 1, 1,
-inf, -inf,
0.5, 1.5, 0.5, 2.5, 1.5, 2.5, 1.5, 1.5, 0.5, 1.5],
[0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 0.5, 2.5, 0.5, 1.5],
[0, 1, 2, 1, 2, 3, 0, 3, 0, 1,
1, 1, 0, 2, 1, 3, 2, 2, 1, 1]
], dtype='Polygons[float64]'),
})

cvs = ds.Canvas(plot_width=16, plot_height=16)
agg = cvs.polygons(df, geometry='polygons', agg=ds.count())

axis = ds.core.LinearAxis()
lincoords_x = axis.compute_index(
axis.compute_scale_and_translate((0, 2), 16), 16)
lincoords_y = axis.compute_index(
axis.compute_scale_and_translate((1, 3), 16), 16)

sol = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
], dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])

assert_eq_xr(agg, out)


@pytest.mark.parametrize('DataFrame', DataFrames)
def test_no_overlap_agg(DataFrame):
df = DataFrame({
'polygons': pd.Series([
[1, 1, 2, 2, 1, 3, 0, 2, 1, 1,
-inf, -inf,
0.5, 1.5, 0.5, 2.5, 1.5, 2.5, 1.5, 1.5, 0.5, 1.5],
[0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 0.5, 2.5, 0.5, 1.5],
[0, 1, 2, 1, 2, 3, 0, 3, 0, 1,
1, 1, 0, 2, 1, 3, 2, 2, 1, 1]
], dtype='Polygons[float64]'),
'v': range(3)
})

cvs = ds.Canvas(plot_width=16, plot_height=16)
agg = cvs.polygons(df, geometry='polygons', agg=ds.sum('v'))

axis = ds.core.LinearAxis()
lincoords_x = axis.compute_index(
axis.compute_scale_and_translate((0, 2), 16), 16)
lincoords_y = axis.compute_index(
axis.compute_scale_and_translate((1, 3), 16), 16)

sol = np.array([
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
[nan, 2., 2., 2., 2., 2., 2., 2., 0., 0., 2., 2., 2., 2., 2., 2.],
[nan, 2., 2., 2., 2., 2., 2., 0., 0., 0., 0., 2., 2., 2., 2., 2.],
[nan, 2., 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2.],
[nan, 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2.],
[nan, 2., 2., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 2., 2.],
[nan, 2., 2., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 2.],
[nan, 2., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
[nan, 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
[nan, 2., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
[nan, 2., 2., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 2.],
[nan, 2., 2., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 2., 2.],
[nan, 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2.],
[nan, 2., 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2.],
[nan, 2., 2., 2., 2., 2., 2., 0., 0., 0., 0., 2., 2., 2., 2., 2.],
[nan, 2., 2., 2., 2., 2., 2., 2., 0., 0., 2., 2., 2., 2., 2., 2.]
])

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])
assert_eq_xr(agg, out)