Fix dynspread not to count already-spread pixels and respect array bnds (#1001)

jbednar · web-flow · commit 1ae52b65ec8a · 2021-06-05T15:55:59.000-05:00
* Fix dynspread not to count already-spread pixels and respect array bounds
* Defined dynspread as fraction of non-empty pixels with neighbors
* Updated tests to match new behavior
diff --git a/datashader/tests/test_transfer_functions.py b/datashader/tests/test_transfer_functions.py
@@ -912,96 +912,113 @@ def test_rgb_density():
     assert tf._rgb_density(data) == 1.0
     data = np.zeros((4, 4), dtype='uint32')
     assert tf._rgb_density(data) == np.inf
-    data[2, 2] = b
+    data[3, 3] = b
     assert tf._rgb_density(data) == 0
-    data[2, 1] = data[1, 2] = data[1, 1] = b
-    assert np.allclose(tf._rgb_density(data), 3./8.)
+    data[2, 0] = data[0, 2] = data[1, 1] = b
+    assert np.allclose(tf._rgb_density(data), 0.75)
+    assert np.allclose(tf._rgb_density(data, 3), 1)
 
 def test_int_array_density():
     data = np.ones((4, 4), dtype='uint32')
     assert tf._array_density(data, float_type=False) == 1.0
     data = np.zeros((4, 4), dtype='uint32')
     assert tf._array_density(data, float_type=False) == np.inf
-    data[2, 2] = 1
+    data[3, 3] = 1
     assert tf._array_density(data, float_type=False) == 0
-    data[2, 1] = data[1, 2] = data[1, 1] = 1
-    assert np.allclose(tf._array_density(data, float_type=False), 3./8.)
+    data[2, 0] = data[0, 2] = data[1, 1] = 1
+    assert np.allclose(tf._array_density(data, float_type=False), 0.75)
+    assert np.allclose(tf._array_density(data, float_type=False, px=3), 1)
 
+    
 def test_float_array_density():
     data = np.ones((4, 4), dtype='float32')
     assert tf._array_density(data, float_type=True) == 1.0
     data = np.full((4, 4), np.nan, dtype='float32')
     assert tf._array_density(data, float_type=True) == np.inf
-    data[2, 2] = 1
+    data[3, 3] = 1
     assert tf._array_density(data, float_type=True) == 0
-    data[2, 1] = data[1, 2] = data[1, 1] = 1
-    assert np.allclose(tf._array_density(data, float_type=True), 3./8.)
-
+    data[2, 0] = data[0, 2] = data[1, 1] = 1
+    assert np.allclose(tf._array_density(data, float_type=True), 0.75)
+    assert np.allclose(tf._array_density(data, float_type=True, px=3), 1)
+    
 
 def test_rgb_dynspread():
     b = 0xffff0000
+    coords = [np.arange(5), np.arange(5)]
     data = np.array([[b, b, 0, 0, 0],
                      [b, b, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 0, 0, b, 0],
                      [0, 0, 0, 0, 0]], dtype='uint32')
-    coords = [np.arange(5), np.arange(5)]
     img = tf.Image(data, coords=coords, dims=dims)
-    assert tf.dynspread(img).equals(tf.spread(img, 1))
-    assert tf.dynspread(img, threshold=0.9).equals(tf.spread(img, 2))
-    assert tf.dynspread(img, threshold=0).equals(img)
+    assert tf.dynspread(img).equals(img)
+    data = np.array([[b, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0],
+                     [b, 0, 0, 0, b],
+                     [0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0]], dtype='uint32')
+    img = tf.Image(data, coords=coords, dims=dims)
+    assert tf.dynspread(img, threshold=0.4).equals(tf.spread(img, 0))
+    assert tf.dynspread(img, threshold=0.7).equals(tf.spread(img, 1))
+    assert tf.dynspread(img, threshold=1.0).equals(tf.spread(img, 3))
     assert tf.dynspread(img, max_px=0).equals(img)
 
     pytest.raises(ValueError, lambda: tf.dynspread(img, threshold=1.1))
     pytest.raises(ValueError, lambda: tf.dynspread(img, max_px=-1))
 
 def test_array_dynspread():
+    coords = [np.arange(5), np.arange(5)]
     data = np.array([[1, 1, 0, 0, 0],
                      [1, 1, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 0],
                      [0, 0, 0, 0, 0]], dtype='uint32')
-    coords = [np.arange(5), np.arange(5)]
     arr = xr.DataArray(data, coords=coords, dims=dims)
-    assert tf.dynspread(arr).equals(tf.spread(arr, 1))
-    assert tf.dynspread(arr, threshold=0.9).equals(tf.spread(arr, 2))
-    assert tf.dynspread(arr, threshold=0).equals(arr)
+    assert tf.dynspread(arr).equals(arr)
+    data = np.array([[1, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0],
+                     [1, 0, 0, 0, 1],
+                     [0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0]], dtype='uint32')
+    arr = xr.DataArray(data, coords=coords, dims=dims)
+    assert tf.dynspread(arr, threshold=0.4).equals(tf.spread(arr, 0))
+    assert tf.dynspread(arr, threshold=0.7).equals(tf.spread(arr, 1))
+    assert tf.dynspread(arr, threshold=1.0).equals(tf.spread(arr, 3))
     assert tf.dynspread(arr, max_px=0).equals(arr)
 
     pytest.raises(ValueError, lambda: tf.dynspread(arr, threshold=1.1))
     pytest.raises(ValueError, lambda: tf.dynspread(arr, max_px=-1))
 
 
 def test_categorical_dynspread():
-    a_data = np.array([[0, 1, 0, 0, 0],
+    a_data = np.array([[1, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0]], dtype='int32')
 
     b_data = np.array([[0, 0, 0, 0, 0],
-                       [0, 1, 0, 0, 0],
                        [0, 0, 0, 0, 0],
+                       [1, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0]], dtype='int32')
 
-    c_data = np.array([[1, 0, 0, 0, 0],
-                       [1, 0, 0, 0, 0],
+    c_data = np.array([[0, 0, 0, 0, 0],
+                       [0, 0, 0, 0, 0],
+                       [0, 0, 0, 0, 1],
                        [0, 0, 0, 0, 0],
-                       [0, 0, 0, 1, 0],
                        [0, 0, 0, 0, 0]], dtype='int32')
 
     data = np.dstack([a_data, b_data, c_data])
     coords = [np.arange(5), np.arange(5)]
     arr = xr.DataArray(data, coords=coords + [['a', 'b', 'c']],
                        dims=dims + ['cat'])
-    assert tf.dynspread(arr).equals(tf.spread(arr, 1))
-    assert tf.dynspread(arr, threshold=0.9).equals(tf.spread(arr, 2))
-    assert tf.dynspread(arr, threshold=0).equals(arr)
+    assert tf.dynspread(arr, threshold=0.4).equals(tf.spread(arr, 0))
+    assert tf.dynspread(arr, threshold=0.7).equals(tf.spread(arr, 1))
+    assert tf.dynspread(arr, threshold=1.0).equals(tf.spread(arr, 3))
     assert tf.dynspread(arr, max_px=0).equals(arr)
 
 
-
 def check_eq_hist_cdf_slope(eq):
     # Check that the slope of the cdf is ~1
     # Adapted from scikit-image's test for the same function
diff --git a/datashader/transfer_functions/__init__.py b/datashader/transfer_functions/__init__.py
@@ -744,60 +744,70 @@ def dynspread(img, threshold=0.5, max_px=3, shape='circle', how=None, name=None)
         raise ValueError("max_px must be >= 0")
     # Simple linear search. Not super efficient, but max_px is usually small.
     float_type = img.dtype in [np.float32, np.float64]
-    for px in range(max_px + 1):
-        out = spread(img, px, shape=shape, how=how, name=name)
+    px_=0
+    for px in range(1, max_px + 1):
+        px_=px
         if is_image:
-            density = _rgb_density(out.data)
+            density = _rgb_density(img.data, px*2)
         elif len(img.shape) == 2:
-            density = _array_density(out.data, float_type)
+            density = _array_density(img.data, float_type, px*2)
         else:
-            masked = np.logical_not(np.isnan(out)) if float_type else (out != 0)
+            masked = np.logical_not(np.isnan(img)) if float_type else (img != 0)
             flat_mask = np.sum(masked, axis=2, dtype='uint32')
-            density = _array_density(flat_mask.data, False)
-        if density >= threshold:
+            density = _array_density(flat_mask.data, False, px*2)
+        if density > threshold:
+            px_=px_-1
             break
-
-    return out
+        
+    if px_>=1:
+        return spread(img, px_, shape=shape, how=how, name=name)
+    else:
+        return img
 
 
 @nb.jit(nopython=True, nogil=True, cache=True)
-def _array_density(arr, float_type):
+def _array_density(arr, float_type, px=1):
     """Compute a density heuristic of an array.
 
     The density is a number in [0, 1], and indicates the normalized mean number
-    of non-empty adjacent pixels for each non-empty pixel.
+    of non-empty pixels that have neighbors in the given px radius.
     """
     M, N = arr.shape
-    cnt = total = 0
-    for y in range(1, M - 1):
-        for x in range(1, N - 1):
+    cnt = has_neighbors = 0
+    for y in range(0, M):
+        for x in range(0, N):
             el = arr[y, x]
             if (float_type and not np.isnan(el)) or (not float_type and el!=0):
                 cnt += 1
-                for i in range(y - 1, y + 2):
-                    for j in range(x - 1, x + 2):
-                        if float_type and not np.isnan(arr[i, j]):
-                            total += 1
-                        elif not float_type and arr[i, j] != 0:
-                            total += 1
-    return (total - cnt)/(cnt * 8) if cnt else np.inf
+                neighbors = 0
+                for i in     range(max(0, y - px), min(y + px + 1, M)):
+                    for j in range(max(0, x - px), min(x + px + 1, N)):
+                        if ((float_type and not np.isnan(arr[i, j])) or
+                            (not float_type and arr[i, j] != 0)):
+                            neighbors += 1
+                if neighbors>1: # (excludes self)
+                    has_neighbors += 1
+    return has_neighbors/cnt if cnt else np.inf
 
 
 @nb.jit(nopython=True, nogil=True, cache=True)
-def _rgb_density(arr):
+def _rgb_density(arr, px=1):
     """Compute a density heuristic of an image.
 
     The density is a number in [0, 1], and indicates the normalized mean number
-    of non-empty adjacent pixels for each non-empty pixel.
+    of non-empty pixels that have neighbors in the given px radius.
     """
     M, N = arr.shape
-    cnt = total = 0
-    for y in range(1, M - 1):
-        for x in range(1, N - 1):
+    cnt = has_neighbors = 0
+    for y in range(0, M):
+        for x in range(0, N):
             if (arr[y, x] >> 24) & 255:
                 cnt += 1
-                for i in range(y - 1, y + 2):
-                    for j in range(x - 1, x + 2):
+                neighbors = 0
+                for i in     range(max(0, y - px), min(y + px + 1, M)):
+                    for j in range(max(0, x - px), min(x + px + 1, N)):
                         if (arr[i, j] >> 24) & 255:
-                            total += 1
-    return (total - cnt)/(cnt * 8) if cnt else np.inf
+                            neighbors += 1
+                if neighbors>1: # (excludes self)
+                    has_neighbors += 1
+    return has_neighbors/cnt if cnt else np.inf
diff --git a/examples/getting_started/3_Interactivity.ipynb b/examples/getting_started/3_Interactivity.ipynb
@@ -181,7 +181,7 @@
    "outputs": [],
    "source": [
     "datashaded = hd.datashade(points, aggregator=ds.count_cat('cat')).redim.range(x=(-5,5),y=(-5,5))\n",
-    "hd.dynspread(datashaded, threshold=0.50, how='over').opts(height=500,width=500)"
+    "hd.dynspread(datashaded, threshold=0.8, how='over', max_px=5).opts(height=500,width=500)"
    ]
   },
   {
@@ -214,7 +214,7 @@
     "gaussspread = hd.dynspread(datashaded, threshold=0.50, how='over').opts(plot=dict(height=400,width=400))\n",
     "\n",
     "color_key = [(name,color) for name,color in zip([\"d1\",\"d2\",\"d3\",\"d4\",\"d5\"], Sets1to3)]\n",
-    "color_points = hv.NdOverlay({n: hv.Points([0,0], label=str(n)).opts(style=dict(color=c)) for n,c in color_key})\n",
+    "color_points = hv.NdOverlay({n: hv.Points([0,0], label=str(n)).opts(color=c,size=0) for n,c in color_key})\n",
     "\n",
     "color_points * gaussspread"
    ]
diff --git a/examples/user_guide/4_Trajectories.ipynb b/examples/user_guide/4_Trajectories.ipynb
@@ -150,7 +150,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from holoviews.operation.datashader import datashade\n",
+    "from holoviews.operation.datashader import datashade, spread\n",
     "import holoviews as hv\n",
     "hv.extension('bokeh')"
    ]
@@ -168,8 +168,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "opts = hv.opts.RGB(width=500, height=500)\n",
-    "datashade(hv.Path(df, kdims=['x','y']), normalization='linear', aggregator=ds.any()).opts(opts)"
+    "opts = hv.opts.RGB(width=900, height=500, aspect='equal')\n",
+    "spread(datashade(hv.Path(df, kdims=['x','y']), normalization='linear', aggregator=ds.any())).opts(opts)"
    ]
   },
   {
diff --git a/examples/user_guide/7_Networks.ipynb b/examples/user_guide/7_Networks.ipynb
@@ -414,7 +414,7 @@
     "\n",
     "circle = hv.Graph(edges, label='Bokeh edges').opts(node_size=5)\n",
     "hnodes = circle.nodes.opts(size=5)\n",
-    "dscirc = (hd.dynspread(hd.datashade(circle))*hnodes).relabel(\"Datashader edges\")\n",
+    "dscirc = (hd.spread(hd.datashade(circle))*hnodes).relabel(\"Datashader edges\")\n",
     "\n",
     "circle + dscirc"
    ]

Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@`
`181`	`181`	`"outputs": [],`
`182`	`182`	`"source": [`
`183`	`183`	`"datashaded = hd.datashade(points, aggregator=ds.count_cat('cat')).redim.range(x=(-5,5),y=(-5,5))\n",`
`184`		`- "hd.dynspread(datashaded, threshold=0.50, how='over').opts(height=500,width=500)"`
	`184`	`+ "hd.dynspread(datashaded, threshold=0.8, how='over', max_px=5).opts(height=500,width=500)"`
`185`	`185`	`]`
`186`	`186`	`},`
`187`	`187`	`{`
`@@ -214,7 +214,7 @@`
`214`	`214`	`"gaussspread = hd.dynspread(datashaded, threshold=0.50, how='over').opts(plot=dict(height=400,width=400))\n",`
`215`	`215`	`"\n",`
`216`	`216`	`"color_key = [(name,color) for name,color in zip([\"d1\",\"d2\",\"d3\",\"d4\",\"d5\"], Sets1to3)]\n",`
`217`		`- "color_points = hv.NdOverlay({n: hv.Points([0,0], label=str(n)).opts(style=dict(color=c)) for n,c in color_key})\n",`
	`217`	`+ "color_points = hv.NdOverlay({n: hv.Points([0,0], label=str(n)).opts(color=c,size=0) for n,c in color_key})\n",`
`218`	`218`	`"\n",`
`219`	`219`	`"color_points * gaussspread"`
`220`	`220`	`]`
Original file line number	Diff line number	Diff line change
`@@ -150,7 +150,7 @@`
`150`	`150`	`"metadata": {},`
`151`	`151`	`"outputs": [],`
`152`	`152`	`"source": [`
`153`		`- "from holoviews.operation.datashader import datashade\n",`
	`153`	`+ "from holoviews.operation.datashader import datashade, spread\n",`
`154`	`154`	`"import holoviews as hv\n",`
`155`	`155`	`"hv.extension('bokeh')"`
`156`	`156`	`]`
`@@ -168,8 +168,8 @@`
`168`	`168`	`"metadata": {},`
`169`	`169`	`"outputs": [],`
`170`	`170`	`"source": [`
`171`		`- "opts = hv.opts.RGB(width=500, height=500)\n",`
`172`		`- "datashade(hv.Path(df, kdims=['x','y']), normalization='linear', aggregator=ds.any()).opts(opts)"`
	`171`	`+ "opts = hv.opts.RGB(width=900, height=500, aspect='equal')\n",`
	`172`	`+ "spread(datashade(hv.Path(df, kdims=['x','y']), normalization='linear', aggregator=ds.any())).opts(opts)"`
`173`	`173`	`]`
`174`	`174`	`},`
`175`	`175`	`{`
Original file line number	Diff line number	Diff line change
`@@ -414,7 +414,7 @@`
`414`	`414`	`"\n",`
`415`	`415`	`"circle = hv.Graph(edges, label='Bokeh edges').opts(node_size=5)\n",`
`416`	`416`	`"hnodes = circle.nodes.opts(size=5)\n",`
`417`		`- "dscirc = (hd.dynspread(hd.datashade(circle))*hnodes).relabel(\"Datashader edges\")\n",`
	`417`	`+ "dscirc = (hd.spread(hd.datashade(circle))*hnodes).relabel(\"Datashader edges\")\n",`
`418`	`418`	`"\n",`
`419`	`419`	`"circle + dscirc"`
`420`	`420`	`]`