From 8d9313c2ee22c9e788961c35cf1109a85ce3c395 Mon Sep 17 00:00:00 2001 From: montanarograziano Date: Fri, 30 Aug 2024 22:15:41 +0200 Subject: [PATCH] Add query11 implementation --- execute/q11.py | 38 ++++++++++++++++++++++++++++++++++++++ queries/q11.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 execute/q11.py create mode 100644 queries/q11.py diff --git a/execute/q11.py b/execute/q11.py new file mode 100644 index 0000000..1c70faf --- /dev/null +++ b/execute/q11.py @@ -0,0 +1,38 @@ +from pathlib import Path + +import pandas as pd +import polars as pl + +from queries import q11 + +pd.options.mode.copy_on_write = True +pd.options.future.infer_string = True + +nation = Path("data") / "nation.parquet" +partsupp = Path("data") / "partsupp.parquet" +supplier = Path("data") / "supplier.parquet" + +IO_FUNCS = { + "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"), + "pandas[pyarrow]": lambda x: pd.read_parquet( + x, engine="pyarrow", dtype_backend="pyarrow" + ), + "polars[eager]": lambda x: pl.read_parquet(x), + "polars[lazy]": lambda x: pl.scan_parquet(x), +} + +tool = "pandas" +fn = IO_FUNCS[tool] +print(q11.query(fn(nation), fn(partsupp), fn(supplier))) + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q11.query(fn(nation), fn(partsupp), fn(supplier))) + +tool = "polars[eager]" +fn = IO_FUNCS[tool] +print(q11.query(fn(nation), fn(partsupp), fn(supplier))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q11.query(fn(nation), fn(partsupp), fn(supplier)).collect()) diff --git a/queries/q11.py b/queries/q11.py new file mode 100644 index 0000000..a1249ee --- /dev/null +++ b/queries/q11.py @@ -0,0 +1,45 @@ +from datetime import datetime + +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query( + nation_ds_raw: FrameT, + partsupp_ds_raw: FrameT, + supplier_ds_raw: FrameT, +) -> FrameT: + var1 = datetime(1993, 10, 1) + var2 = datetime(1994, 1, 1) + + nation_ds = nw.from_native(nation_ds_raw) + partsupp_ds = nw.from_native(partsupp_ds_raw) + supplier_ds = nw.from_native(supplier_ds_raw) + + var1 = "GERMANY" + var2 = 0.0001 + + q1 = ( + partsupp_ds.join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey") + .join(nation_ds, left_on="s_nationkey", right_on="n_nationkey") + .filter(nw.col("n_name") == var1) + ) + q2 = q1.select( + (nw.col("ps_supplycost") * nw.col("ps_availqty")).sum().round(2).alias("tmp") + * var2 + ) + + q_final = ( + q1.with_columns( + (nw.col("ps_supplycost") * nw.col("ps_availqty")).alias("value") + ) + .group_by("ps_partkey") + .agg(nw.sum("value")) + .join(q2, how="cross") + .filter(nw.col("value") > nw.col("tmp")) + .select("ps_partkey", "value") + .sort("value", descending=True) + ) + + return nw.to_native(q_final)