Skip to content

Commit

Permalink
full order by support with arrange; closes #30
Browse files Browse the repository at this point in the history
  • Loading branch information
machow committed May 20, 2019
1 parent d372f15 commit 0c4591f
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 11 deletions.
8 changes: 8 additions & 0 deletions siuba/sql/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sqlalchemy.sql import sqltypes as types
from functools import singledispatch
from .verbs import case_when, if_else
import warnings

# TODO: must make these take both tbl, col as args, since hard to find window funcs
def sa_is_window(clause):
Expand Down Expand Up @@ -44,6 +45,13 @@ class CumlOver(Over, CustomOverClause):
def set_over(self, group_by, order_by):
self.partition_by = group_by
self.order_by = order_by

if not len(order_by):
warnings.warn(
"No order by columns explicitly set in window function. SQL engine"
"does not guarantee a row ordering. Recommend using an arrange beforehand.",
RuntimeWarning
)
return self


Expand Down
8 changes: 4 additions & 4 deletions siuba/sql/verbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,10 @@ def _arrange(__data, *args):
for expr in args
)

sort_cols = _create_order_by_clause(cols, *args)
sort_cols = _create_order_by_clause(cols, *new_calls)

return __data.append_op(last_op.order_by(*sort_cols), order_by = new_calls)
order_by = __data.order_by + new_calls
return __data.append_op(last_op.order_by(*sort_cols), order_by = order_by)


# TODO: consolidate / pull expr handling funcs into own file?
Expand Down Expand Up @@ -492,8 +493,7 @@ def _summarize(__data, **kwargs):
sel.append_column(col)

# TODO: is a simple method on __data for doing this...
new_data = __data.append_op(sel)
new_data.group_by = tuple()
new_data = __data.append_op(sel, group_by = tuple(), order_by = tuple())
return new_data


Expand Down
12 changes: 11 additions & 1 deletion siuba/tests/test_verb_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""

from siuba import _, filter, group_by, arrange
from siuba.dply.vector import row_number
from siuba.dply.vector import row_number, desc
import pandas as pd

import pytest
Expand Down Expand Up @@ -61,3 +61,13 @@ def test_filter_via_group_by_arrange(backend):
data_frame(g = [1, 2, 2], x = [3, 3, 4])
)

@backend_notimpl("sqlite")
def test_filter_via_group_by_desc_arrange(backend):
dfs = backend.load_df(x = [3,2,1] + [2,3,4], g = [1]*3 + [2]*3)

assert_equal_query(
dfs,
group_by(_.g) >> arrange(desc(_.x)) >> filter(_.x.cumsum() > 3),
data_frame(g = [1, 1, 2, 2, 2], x = [2, 1, 4, 3, 2])
)

13 changes: 8 additions & 5 deletions siuba/tests/test_verb_mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ def test_mutate_using_agg_expr(backend):
def test_mutate_using_cuml_agg(backend):
data = data_frame(x = range(1, 5), g = [1,1,2,2])
dfs = backend.load_df(data)
assert_equal_query(
dfs,
group_by(_.g) >> mutate(y = _.x.cumsum()),
data.assign(y = [1.0, 3, 3, 7])
)

# cuml window without arrange before generates warning
with pytest.warns(None):
assert_equal_query(
dfs,
group_by(_.g) >> mutate(y = _.x.cumsum()),
data.assign(y = [1.0, 3, 3, 7])
)

@pytest.mark.skip("TODO: mutate not preserving var order (#42)")
def test_mutate_overwrites_prev(backend):
Expand Down
1 change: 0 additions & 1 deletion siuba/tests/test_verb_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def test_summarize_no_same_call_var_refs(df):
df >> summarize(y = _.x.min(), z = _.y + 1)


@pytest.mark.skip("TODO (see #30)")
def test_summarize_removes_order_vars(df):
lazy_tbl = df >> summarize(n = n(_))

Expand Down

0 comments on commit 0c4591f

Please sign in to comment.