diff --git a/siuba/sql/translate.py b/siuba/sql/translate.py index 43213c65..5afd1ced 100644 --- a/siuba/sql/translate.py +++ b/siuba/sql/translate.py @@ -2,6 +2,7 @@ from sqlalchemy.sql import sqltypes as types from functools import singledispatch from .verbs import case_when, if_else +import warnings # TODO: must make these take both tbl, col as args, since hard to find window funcs def sa_is_window(clause): @@ -44,6 +45,13 @@ class CumlOver(Over, CustomOverClause): def set_over(self, group_by, order_by): self.partition_by = group_by self.order_by = order_by + + if not len(order_by): + warnings.warn( + "No order by columns explicitly set in window function. SQL engine" + "does not guarantee a row ordering. Recommend using an arrange beforehand.", + RuntimeWarning + ) return self diff --git a/siuba/sql/verbs.py b/siuba/sql/verbs.py index adf21ce7..4dc647f4 100644 --- a/siuba/sql/verbs.py +++ b/siuba/sql/verbs.py @@ -382,9 +382,10 @@ def _arrange(__data, *args): for expr in args ) - sort_cols = _create_order_by_clause(cols, *args) + sort_cols = _create_order_by_clause(cols, *new_calls) - return __data.append_op(last_op.order_by(*sort_cols), order_by = new_calls) + order_by = __data.order_by + new_calls + return __data.append_op(last_op.order_by(*sort_cols), order_by = order_by) # TODO: consolidate / pull expr handling funcs into own file? @@ -492,8 +493,7 @@ def _summarize(__data, **kwargs): sel.append_column(col) # TODO: is a simple method on __data for doing this... - new_data = __data.append_op(sel) - new_data.group_by = tuple() + new_data = __data.append_op(sel, group_by = tuple(), order_by = tuple()) return new_data diff --git a/siuba/tests/test_verb_filter.py b/siuba/tests/test_verb_filter.py index 395ce228..52fe12be 100644 --- a/siuba/tests/test_verb_filter.py +++ b/siuba/tests/test_verb_filter.py @@ -5,7 +5,7 @@ """ from siuba import _, filter, group_by, arrange -from siuba.dply.vector import row_number +from siuba.dply.vector import row_number, desc import pandas as pd import pytest @@ -61,3 +61,13 @@ def test_filter_via_group_by_arrange(backend): data_frame(g = [1, 2, 2], x = [3, 3, 4]) ) +@backend_notimpl("sqlite") +def test_filter_via_group_by_desc_arrange(backend): + dfs = backend.load_df(x = [3,2,1] + [2,3,4], g = [1]*3 + [2]*3) + + assert_equal_query( + dfs, + group_by(_.g) >> arrange(desc(_.x)) >> filter(_.x.cumsum() > 3), + data_frame(g = [1, 1, 2, 2, 2], x = [2, 1, 4, 3, 2]) + ) + diff --git a/siuba/tests/test_verb_mutate.py b/siuba/tests/test_verb_mutate.py index 7572b54f..7c7e5d36 100644 --- a/siuba/tests/test_verb_mutate.py +++ b/siuba/tests/test_verb_mutate.py @@ -81,11 +81,14 @@ def test_mutate_using_agg_expr(backend): def test_mutate_using_cuml_agg(backend): data = data_frame(x = range(1, 5), g = [1,1,2,2]) dfs = backend.load_df(data) - assert_equal_query( - dfs, - group_by(_.g) >> mutate(y = _.x.cumsum()), - data.assign(y = [1.0, 3, 3, 7]) - ) + + # cuml window without arrange before generates warning + with pytest.warns(None): + assert_equal_query( + dfs, + group_by(_.g) >> mutate(y = _.x.cumsum()), + data.assign(y = [1.0, 3, 3, 7]) + ) @pytest.mark.skip("TODO: mutate not preserving var order (#42)") def test_mutate_overwrites_prev(backend): diff --git a/siuba/tests/test_verb_summarize.py b/siuba/tests/test_verb_summarize.py index 6238fe16..21a0fc91 100644 --- a/siuba/tests/test_verb_summarize.py +++ b/siuba/tests/test_verb_summarize.py @@ -66,7 +66,6 @@ def test_summarize_no_same_call_var_refs(df): df >> summarize(y = _.x.min(), z = _.y + 1) -@pytest.mark.skip("TODO (see #30)") def test_summarize_removes_order_vars(df): lazy_tbl = df >> summarize(n = n(_))