From 409347d35e994e7fbd6d6bdeb40a87b805f284ff Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Mon, 12 Feb 2024 18:52:21 +0100 Subject: [PATCH 1/6] Set split_out=True --- tests/tpch/dask_queries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index 7c6659c82b..cc9a3dd6c5 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -887,7 +887,7 @@ def query_17(dataset_path, fs): avg_qnty_by_partkey = ( lineitem.groupby("l_partkey") - .l_quantity.mean() + .l_quantity.mean(split_out=True) .to_frame() .rename(columns={"l_quantity": "l_quantity_avg"}) ) @@ -950,7 +950,7 @@ def query_18(dataset_path, fs): orders, left_on="c_custkey", right_on="o_custkey", how="inner" ).merge(lineitem, left_on="o_orderkey", right_on="l_orderkey", how="inner") - qnt_over_300 = lineitem.groupby("l_orderkey").l_quantity.sum().to_frame() + qnt_over_300 = lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).to_frame() qnt_over_300 = qnt_over_300[qnt_over_300.l_quantity > 300].drop( columns=["l_quantity"] ) From f2ae34c86bb71bbecc747631e9ba2207c379054f Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Mon, 12 Feb 2024 18:53:28 +0100 Subject: [PATCH 2/6] Add FIXMEs --- tests/tpch/dask_queries.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index cc9a3dd6c5..74615a21d5 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -887,6 +887,7 @@ def query_17(dataset_path, fs): avg_qnty_by_partkey = ( lineitem.groupby("l_partkey") + # FIXME: https://github.com/dask-contrib/dask-expr/issues/867 .l_quantity.mean(split_out=True) .to_frame() .rename(columns={"l_quantity": "l_quantity_avg"}) @@ -950,6 +951,7 @@ def query_18(dataset_path, fs): orders, left_on="c_custkey", right_on="o_custkey", how="inner" ).merge(lineitem, left_on="o_orderkey", right_on="l_orderkey", how="inner") + # FIXME: https://github.com/dask-contrib/dask-expr/issues/867 qnt_over_300 = lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).to_frame() qnt_over_300 = qnt_over_300[qnt_over_300.l_quantity > 300].drop( columns=["l_quantity"] From 079c6af3291f54b5643a08f2eebc987c1fd9295e Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Tue, 13 Feb 2024 12:38:45 +0100 Subject: [PATCH 3/6] Q11 --- tests/tpch/dask_queries.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index 74615a21d5..b16f0a4428 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -576,7 +576,8 @@ def query_11(dataset_path, fs): joined["value"] = joined.ps_supplycost * joined.ps_availqty - res = joined.groupby("ps_partkey")["value"].sum() + # FIXME: https://github.com/dask-contrib/dask-expr/issues/867 + res = joined.groupby("ps_partkey")["value"].sum(split_out=True) res = ( res[res > threshold] .round(2) From a480eea89dc86d480fa9d83d24af62a265b2fa70 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Tue, 13 Feb 2024 12:43:59 +0100 Subject: [PATCH 4/6] pre-commit --- tests/tpch/dask_queries.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index b16f0a4428..92c332d61e 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -953,7 +953,9 @@ def query_18(dataset_path, fs): ).merge(lineitem, left_on="o_orderkey", right_on="l_orderkey", how="inner") # FIXME: https://github.com/dask-contrib/dask-expr/issues/867 - qnt_over_300 = lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).to_frame() + qnt_over_300 = ( + lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).to_frame() + ) qnt_over_300 = qnt_over_300[qnt_over_300.l_quantity > 300].drop( columns=["l_quantity"] ) From bbf457e3cac0e3595189aa9e0d05b867228b9307 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Tue, 13 Feb 2024 12:50:19 +0100 Subject: [PATCH 5/6] Q13 --- tests/tpch/dask_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index 92c332d61e..c1f95b8b19 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -677,7 +677,7 @@ def query_13(dataset_path, fs): ) subquery = ( subquery.groupby("c_custkey") - .o_orderkey.count() + .o_orderkey.count(split_out=True) .to_frame() .reset_index() .rename(columns={"o_orderkey": "c_count"})[["c_custkey", "c_count"]] From 9656b7d8126676996bffcafc119dd155d2f5e844 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Tue, 13 Feb 2024 12:57:33 +0100 Subject: [PATCH 6/6] FIXME --- tests/tpch/dask_queries.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py index c1f95b8b19..ca4cacac5d 100644 --- a/tests/tpch/dask_queries.py +++ b/tests/tpch/dask_queries.py @@ -677,6 +677,7 @@ def query_13(dataset_path, fs): ) subquery = ( subquery.groupby("c_custkey") + # FIXME: https://github.com/dask-contrib/dask-expr/issues/867 .o_orderkey.count(split_out=True) .to_frame() .reset_index()