Merge pull request #103 from machow/feat-case-when-call-vals

machow · web-flow · commit 2eb7cb206c17 · 2019-08-06T00:37:03.000-04:00
Feat case when call vals
diff --git a/siuba/dply/verbs.py b/siuba/dply/verbs.py
@@ -709,21 +709,40 @@ def _if_else(cond, true_vals, false_vals):
 # TODO: evaluate this non-table verb approach
 from siuba.siu import DictCall
 
+def _val_call(call, data, n, indx = None):
+    if not callable(call):
+        return call
+
+    arr = call(data)
+    if arr.shape != (n,):
+        raise ValueError("Expected call to return array of shape {}"
+                         "but it returned shape {}".format(n, arr.shape))
+
+    return arr[indx] if indx is not None else arr
+
+
 @singledispatch2((pd.DataFrame,pd.Series))
 def case_when(__data, cases):
     if isinstance(cases, Call):
         cases = cases(__data)
     # TODO: handle when receive list of (k,v) pairs for py < 3.5 compat?
-    out = np.repeat(None, len(__data))
-    for k, v in reversed(list(cases.items())):
+
+    stripped_cases = {strip_symbolic(k): strip_symbolic(v) for k,v in cases.items()}
+    n = len(__data)
+    out = np.repeat(None, n)
+    for k, v in reversed(list(stripped_cases.items())):
         if callable(k):
-            result = k(__data)
+            result = _val_call(k, __data, n)
             indx = np.where(result)[0]
-            out[indx] = v
+
+            val_res = _val_call(v, __data, n, indx)
+            out[indx] = val_res
         elif k:
             # e.g. k is just True, etc..
-            out[:] = v
+            val_res = _val_call(v, __data, n)
+            out[:] = val_res
 
+    # by recreating an array, attempts to cast as best dtype
     return np.array(list(out))
 
 @case_when.register(Symbolic)
diff --git a/siuba/tests/test_verb_case_when.py b/siuba/tests/test_verb_case_when.py
@@ -0,0 +1,45 @@
+import pandas as pd
+import numpy as np
+import pytest
+
+from siuba.dply.verbs import case_when
+from numpy.testing import assert_equal
+from siuba.siu import _
+
+DATA = pd.DataFrame({
+    'x': [0,1,2],
+    'y': [10, 11, 12]
+    })
+
+
+@pytest.fixture
+def data():
+    return DATA.copy()
+
+
+@pytest.mark.parametrize("k,v, res", [
+    (True, 1,     [1]*3),
+    (True, False, [False]*3),
+    (True, _.y,   [10, 11, 12]),
+    (True, lambda _: _.y, [10, 11, 12]),
+    (_.x < 2, 0,  [0, 0, None]),
+    (_.x < 2, "small", ["small", "small", None]),
+    (_.x < 2, _.y, [10, 11, None]),
+    (lambda _: _.x < 2, 0,  [0, 0, None]),
+    #(np.array([True, True, False]), 0, [0, 0, None])
+    ])
+def test_case_when_single_cond(k, v, res, data):
+    arr_res = np.array(res)
+    out = case_when(data, {k: v})
+
+    assert_equal(out, arr_res)
+
+
+def test_case_when_cond_order(data):
+    out = case_when(data, {
+        lambda _: _.x < 2  :  0,
+        True               : 999
+        })
+
+    assert_equal(out, np.array([0, 0, 999]))
+