machow
diff --git a/‎.github/workflows/ci.yml
+6-6 b/‎.github/workflows/ci.yml
+6-6
diff --git a/‎docs/developer/backend_sql.Rmd
+33-11 b/‎docs/developer/backend_sql.Rmd
+33-11
diff --git a/‎setup.py
+4-2 b/‎setup.py
+4-2
diff --git a/‎siuba/__init__.py
+1-1 b/‎siuba/__init__.py
+1-1
diff --git a/‎siuba/dply/verbs.py
+7 b/‎siuba/dply/verbs.py
+7
diff --git a/‎siuba/experimental/pd_groups/dialect.py
+6-1 b/‎siuba/experimental/pd_groups/dialect.py
+6-1
diff --git a/‎siuba/experimental/pd_groups/test_pd_groups.py
+2-2 b/‎siuba/experimental/pd_groups/test_pd_groups.py
+2-2
diff --git a/‎siuba/ops/__init__.py
+9 b/‎siuba/ops/__init__.py
+9
diff --git a/‎siuba/ops/support/base.py
+43-10 b/‎siuba/ops/support/base.py
+43-10
@@ -1,7 +1,9 @@
 name: CI
 
 on:
+  workflow_dispatch:
   push:
+    branches: ['main', 'dev-*']
   pull_request:
   release:
     types: [published]
@@ -13,15 +15,10 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         requirements: ['-r requirements.txt']
         include:
           # historical requirements
-          - name: "2020-early dependencies"
-            requirements: numpy==1.17.4 pandas~=0.25.3 SQLAlchemy~=1.3.11 psycopg2~=2.8.4 PyMySQL==1.0.2
-            pytest_flags: --ignore=siuba/dply/forcats.py siuba
-            python-version: 3.6
-          # current
           - name: "2020-mid dependencies"
             python-version: 3.8
             requirements: numpy~=1.19.1 pandas~=1.1.0 SQLAlchemy~=1.3.18 psycopg2~=2.8.5 PyMySQL==1.0.2
@@ -52,6 +49,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install $REQUIREMENTS
           python -m pip install -r requirements-test.txt
+          python -m pip install snowflake-sqlalchemy==1.3.3
           python -m pip install .
         env:
           REQUIREMENTS: ${{ matrix.requirements }}
@@ -61,6 +59,8 @@ jobs:
         env:
           SB_TEST_PGPORT: 5432
           PYTEST_FLAGS: ${{ matrix.pytest_flags }}
+          SB_TEST_SNOWFLAKEPASSWORD: ${{ secrets.SB_TEST_SNOWFLAKEPASSWORD }}
+          SB_TEST_SNOWFLAKEHOST: ${{ secrets.SB_TEST_SNOWFLAKEHOST }}
 
       # optional step for running bigquery tests ----
       - name: Set up Cloud SDK
 
@@ -5,20 +5,26 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.4.2
+      jupytext_version: 1.13.7
   kernelspec:
-    display_name: Python 3
+    display_name: Python 3 (ipykernel)
     language: python
     name: python3
 ---
 
 ```{python nbsphinx=hidden}
 import pandas as pd
 pd.set_option("display.max_rows", 5)
+
+from siuba.siu.format import Formatter
+
+show_tree = lambda x: print(Formatter().format(x))
 ```
 
 # SQL backend
 
+> ⚠️: This document is being revised (though the code runs correctly!).
+
 
 ## Step 1: Column Translation
 
@@ -28,7 +34,8 @@ Column translation requires three pieces:
 1. **Locals:** Functions for creating the sqlalchemy clause corresponding to an 
    operation.
 2. **Column Data:** Classes representing columns under normal and aggregate settings.   
-3. **Translator:** A class that can take a symbolic expression (e.g. `_.x.mean()`) and return the correct sqlachemy clause.
+3. **Translator:** A class that can take a symbolic expression (e.g. `_.x.mean()`) and return it in call form: `mean(_.x)`.
+4. **Codata visitor:** A class that takes the above call, and swaps in the sql dialect version of each call.
 
 
 ```{python}
@@ -77,9 +84,10 @@ aggregation = {
 from siuba.sql.translate import SqlTranslator
 
 translator = SqlTranslator.from_mappings(
-    scalar, window, aggregation,
     WowSqlColumn, WowSqlColumnAgg
 )
+
+# TODO: how to work in codata visitor?
 ```
 
 ## Column Data
@@ -96,7 +104,7 @@ The entries of each local dictionary are functions that take a sqlalchemy.sql.Cl
 ```{python}
 from sqlalchemy import sql
 
-expr_rank = window["rank"](sql.column("a_col"))
+expr_rank = window["rank"](WowSqlColumn(), sql.column("a_col"))
 expr_rank
 ```
 
@@ -111,6 +119,8 @@ Below, we set up a sqlalchemy select statement in order to demonstrate the trans
 
 ```{python}
 from siuba import _
+
+
 from sqlalchemy.sql import column, select
 
 sel = select([column('x'), column('y')])
@@ -120,26 +130,38 @@ Then we feed the columns to the translated call.
 
 ```{python}
 call_add = translator.translate(_.x + _.y)
-call_add(sel.columns)
+
+show_tree(call_add)
 ```
 
 Note that behind the scenes, the translator goes down the call tree and swaps functions like `"__add__"` with the local translations.
 
+```{python}
+from siuba.siu.visitors import CodataVisitor
+codata = CodataVisitor(WowSqlColumn, object)
+
+call_add_final = codata.visit(call_add)
+
+show_tree(call_add_final)
+```
+
 ```{python}
 # the root node is __add__. shown as +.
 _.x + _.y
 ```
 
 ```{python}
 # We can see this in action by calling the translation directly.
-scalar["__add__"](sel.columns.x, sel.columns.y)
+scalar["__add__"](WowSqlColumn(), sel.columns.x, sel.columns.y)
 ```
 
 By default the translate method assumes the expression is using window functions, so operations like `.mean()` return SqlAlchemy Over clauses.
 
 ```{python}
 f_translate = translator.translate(_.x.mean())
-expr = f_translate(sel.columns)
+
+f_translate_co = codata.visit(f_translate)
+expr = f_translate_co(sel.columns)
 
 expr
 ```
@@ -166,7 +188,7 @@ from siuba.siu import _, symbolic_dispatch
 from sqlalchemy import sql
 
 @symbolic_dispatch(cls = WowSqlColumn)
-def round(col):
+def round(self, col):
     print("running round function")
     
     return sql.function.round(col)
@@ -224,8 +246,8 @@ tbl_cars
 Note that you can access a number of useful attributes.
 
 ```{python}
-# the underlying translator
-f_add = tbl_cars.translator.translate(_.mpg + _.hp)
+# calls the underlying translator and codata
+f_add = tbl_cars.shape_call(_.mpg + _.hp)
 f_add(tbl_cars.last_op.columns)
 ```
 
 
@@ -46,14 +46,16 @@
             "gapminder==0.1",
         ],
     },
-    python_requires=">=3.6",
+    python_requires=">=3.7",
     include_package_data=True,
     long_description=README,
     long_description_content_type="text/markdown",
     classifiers=[
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
     ],
 )
 
@@ -1,5 +1,5 @@
 # version ---------------------------------------------------------------------
-__version__ = "0.1.2"
+__version__ = "0.2.0.dev3"
 
 # default imports--------------------------------------------------------------
 from .siu import _, Lam
 
@@ -66,6 +66,13 @@ def install_pd_siu():
     DataFrameGroupBy.__repr__ = _repr_grouped_df_console_
 
 def _repr_grouped_df_html_(self):
+    obj_repr = self.obj._repr_html_()
+    
+    # user can config pandas not to return html representation, in which case
+    # the ipython behavior should fall back to repr
+    if obj_repr is None:
+        return None
+
     return "<div><p>(grouped data frame)</p>" + self.obj._repr_html_() + "</div>"
 
 def _repr_grouped_df_console_(self):
 
@@ -1,4 +1,4 @@
-from siuba.siu import CallTreeLocal, FunctionLookupError
+from siuba.siu import CallTreeLocal, FunctionLookupError, ExecutionValidatorVisitor
 from .groupby import SeriesGroupBy
 
 from .translate import (
@@ -99,6 +99,8 @@ def register_method(ns, op_name, f, is_property = False, accessor = None):
         call_props = ALL_PROPERTIES
         )
 
+call_validator = ExecutionValidatorVisitor(GroupByAgg, SeriesGroupBy)
+
 
 # Fast group by verbs =========================================================
 
@@ -123,6 +125,8 @@ def grouped_eval(__data, expr, require_agg = False):
     if isinstance(expr, Call):
         try:
             call = call_listener.enter(expr)
+            call_validator.visit(call)
+
         except FunctionLookupError as e:
             fallback_warning(expr, str(e))
             call = expr
@@ -162,6 +166,7 @@ def _transform_args(args):
         elif isinstance(expr, Call):
             try:
                 call = call_listener.enter(expr)
+                call_validator.visit(call)
                 out.append(call)
             except FunctionLookupError as e:
                 fallback_warning(expr, str(e))
 
@@ -122,7 +122,7 @@ def test_transform_args():
 def test_fast_grouped_custom_user_funcs():
     @symbolic_dispatch
     def f(x):
-        return x.mean()
+        raise NotImplementedError()
 
     @f.register(SeriesGroupBy)
     def _f_grouped(x) -> GroupByAgg:
@@ -149,7 +149,7 @@ def test_fast_grouped_custom_user_func_fail():
     def f(x):
         return x.mean()
 
-    @f.register(GroupByAgg)
+    @f.register(SeriesGroupBy)
     def _f_gser(x):
         # note, no return annotation, so translator will raise an error
         return GroupByAgg.from_result(x.mean(), x)
 
@@ -1,4 +1,13 @@
 from .generics import ALL_OPS, PLAIN_OPS
+from .utils import _register_series_default
+
+# register default series methods on all operations
+for _generic in ALL_OPS.values():
+    _register_series_default(_generic)
+
+del _generic
+del _register_series_default
+
 
 # import accessor generics. These are included in ALL_OPS, but since we want
 # users to be able to import from them, also need to be modules. Start their
 
@@ -8,7 +8,7 @@
 from siuba.siu import FunctionLookupBound
 from siuba.sql.utils import get_dialect_translator
 
-SQL_BACKENDS = ["postgresql", "redshift", "sqlite", "mysql", "bigquery"]
+SQL_BACKENDS = ["postgresql", "redshift", "sqlite", "mysql", "bigquery", "snowflake"]
 ALL_BACKENDS = SQL_BACKENDS + ["pandas"]
 
 methods = pd.DataFrame(
@@ -32,24 +32,57 @@ def read_dialect(name):
 
 
 def read_sql_op(name, backend, translator):
-    f_win = translator.window.local.get(name)
-    f_agg = translator.aggregate.local.get(name)
-
-    # check FunctionLookupBound, a sentinal class for not implemented funcs
-    support =  not (f_win is None or isinstance(f_win, FunctionLookupBound))
-    metadata = getattr(f_win, "operation", {})
+    # TODO: MC-NOTE - cleanup this code
+    from siuba.siu.visitors import CodataVisitor, FunctionLookupError
+    from siuba.ops.utils import Operation
+    co_win = CodataVisitor(translator.window.dispatch_cls)
+    co_agg = CodataVisitor(translator.aggregate.dispatch_cls)
+
+    disp_win = translator.window.local[name]
+    disp_agg = translator.aggregate.local[name]
+
+    try:
+        f_win = co_win.validate_dispatcher(disp_win, strict=False)
+        if isinstance(f_win, FunctionLookupBound):
+            win_supported = False
+        elif disp_win.dispatch(object) is f_win:
+            win_supported = False
+        else:
+            win_supported = True
+    except FunctionLookupError:
+        f_win = None
+        win_supported = False
+
+
+    try:
+        f_agg = co_agg.validate_dispatcher(disp_agg)
+        if isinstance(f_agg, FunctionLookupBound):
+            agg_supported = False
+        else:
+            agg_supported = True
+    except FunctionLookupError:
+        agg_supported = False
 
     # window functions should be a superset of agg functions
-    if f_win is None and f_agg is not None:
+    if f_win is None and agg_supported:
         raise Exception("agg functions in %s without window funcs: %s" %(backend, name))
 
-    if support and isinstance(f_agg, FunctionLookupBound):
+    if win_supported and not agg_supported:
+        flags = "no_aggregate"
+    elif agg_supported and not win_supported:
         flags = "no_mutate"
     else:
         flags = ""
 
-    meta = {"is_supported": support, "flags": flags, **metadata}
+    if win_supported or agg_supported:
+        metadata = getattr(f_win, "operation", {})
+        if isinstance(metadata, Operation):
+            metadata = {**vars(metadata)}
+        meta = {"is_supported": True, "flags": flags, **metadata}
 
+    else:
+        meta = {"is_supported": False, "flags": flags}
+    
     return {"full_name": name, "backend": backend, "metadata": meta}