Skip to content

Commit 74431e7

Browse files
committed
Fixup: refactor / cleanup
1 parent 1717751 commit 74431e7

File tree

1 file changed

+27
-57
lines changed

1 file changed

+27
-57
lines changed

tests/tpch/test_dask.py

+27-57
Original file line numberDiff line numberDiff line change
@@ -304,83 +304,54 @@ def test_query_8(client, dataset_path, fs):
304304
var1 = datetime.strptime("1995-01-01", "%Y-%m-%d")
305305
var2 = datetime.strptime("1997-01-01", "%Y-%m-%d")
306306

307-
supplier_ds = dd.read_parquet(dataset_path + "supplier", filesystem=fs)
308-
lineitem_ds = dd.read_parquet(dataset_path + "lineitem", filesystem=fs)
309-
orders_ds = dd.read_parquet(dataset_path + "orders", filesystem=fs)
310-
customer_ds = dd.read_parquet(dataset_path + "customer", filesystem=fs)
311-
nation_ds = dd.read_parquet(dataset_path + "nation", filesystem=fs)
312-
region_ds = dd.read_parquet(dataset_path + "region", filesystem=fs)
313-
part_ds = dd.read_parquet(dataset_path + "part", filesystem=fs)
307+
supplier = dd.read_parquet(dataset_path + "supplier", filesystem=fs)
308+
lineitem = dd.read_parquet(dataset_path + "lineitem", filesystem=fs)
309+
orders = dd.read_parquet(dataset_path + "orders", filesystem=fs)
310+
customer = dd.read_parquet(dataset_path + "customer", filesystem=fs)
311+
nation = dd.read_parquet(dataset_path + "nation", filesystem=fs)
312+
region = dd.read_parquet(dataset_path + "region", filesystem=fs)
313+
part = dd.read_parquet(dataset_path + "part", filesystem=fs)
314314

315-
part_filtered = part_ds[part_ds["p_type"] == "ECONOMY ANODIZED STEEL"][
316-
["p_partkey"]
317-
]
315+
part = part[part["p_type"] == "ECONOMY ANODIZED STEEL"][["p_partkey"]]
318316

319-
lineitem_filtered = lineitem_ds[["l_partkey", "l_suppkey", "l_orderkey"]]
320-
lineitem_filtered["volume"] = lineitem_ds["l_extendedprice"] * (
321-
1.0 - lineitem_ds["l_discount"]
322-
)
323-
total = part_filtered.merge(
324-
lineitem_filtered,
325-
left_on="p_partkey",
326-
right_on="l_partkey",
327-
how="inner",
328-
)[["l_suppkey", "l_orderkey", "volume"]]
317+
lineitem["volume"] = lineitem["l_extendedprice"] * (1.0 - lineitem["l_discount"])
318+
total = part.merge(lineitem, left_on="p_partkey", right_on="l_partkey", how="inner")
329319

330-
supplier_filtered = supplier_ds[["s_suppkey", "s_nationkey"]]
331320
total = total.merge(
332-
supplier_filtered,
333-
left_on="l_suppkey",
334-
right_on="s_suppkey",
335-
how="inner",
336-
)[["l_orderkey", "volume", "s_nationkey"]]
321+
supplier, left_on="l_suppkey", right_on="s_suppkey", how="inner"
322+
)
337323

338-
orders_filtered = orders_ds[
339-
(orders_ds["o_orderdate"] >= var1) & (orders_ds["o_orderdate"] < var2)
340-
]
324+
orders = orders[(orders["o_orderdate"] >= var1) & (orders["o_orderdate"] < var2)]
341325

342-
orders_filtered["o_year"] = orders_filtered["o_orderdate"].dt.year
343-
orders_filtered = orders_filtered[["o_orderkey", "o_custkey", "o_year"]]
326+
orders["o_year"] = orders["o_orderdate"].dt.year
344327
total = total.merge(
345-
orders_filtered,
346-
left_on="l_orderkey",
347-
right_on="o_orderkey",
348-
how="inner",
349-
)[["volume", "s_nationkey", "o_custkey", "o_year"]]
328+
orders, left_on="l_orderkey", right_on="o_orderkey", how="inner"
329+
)
350330

351-
customer_filtered = customer_ds[["c_custkey", "c_nationkey"]]
352331
total = total.merge(
353-
customer_filtered,
354-
left_on="o_custkey",
355-
right_on="c_custkey",
356-
how="inner",
357-
)[["volume", "s_nationkey", "o_year", "c_nationkey"]]
358-
359-
n1_filtered = nation_ds[["n_nationkey", "n_regionkey"]]
360-
n2_filtered = nation_ds[["n_nationkey", "n_name"]].rename(
361-
columns={"n_name": "nation"}
332+
customer, left_on="o_custkey", right_on="c_custkey", how="inner"
362333
)
334+
335+
n1_filtered = nation[["n_nationkey", "n_regionkey"]]
336+
n2_filtered = nation[["n_nationkey", "n_name"]].rename(columns={"n_name": "nation"})
363337
total = total.merge(
364338
n1_filtered,
365339
left_on="c_nationkey",
366340
right_on="n_nationkey",
367341
how="inner",
368-
)[["volume", "s_nationkey", "o_year", "n_regionkey"]]
342+
)
369343

370344
total = total.merge(
371345
n2_filtered,
372346
left_on="s_nationkey",
373347
right_on="n_nationkey",
374348
how="inner",
375-
)[["volume", "o_year", "n_regionkey", "nation"]]
349+
)
376350

377-
region_filtered = region_ds[region_ds["r_name"] == "AMERICA"][["r_regionkey"]]
351+
region_filtered = region[region["r_name"] == "AMERICA"][["r_regionkey"]]
378352
total = total.merge(
379-
region_filtered,
380-
left_on="n_regionkey",
381-
right_on="r_regionkey",
382-
how="inner",
383-
)[["volume", "o_year", "nation"]]
353+
region_filtered, left_on="n_regionkey", right_on="r_regionkey", how="inner"
354+
)
384355

385356
mkt_brazil = (
386357
total[total["nation"] == "BRAZIL"].groupby("o_year").volume.sum().reset_index()
@@ -390,6 +361,5 @@ def test_query_8(client, dataset_path, fs):
390361
mkt_brazil, left_on="o_year", right_on="o_year", suffixes=("_mkt", "_brazil")
391362
)
392363
final["mkt_share"] = final.volume_brazil / final.volume_mkt
393-
total = total.sort_values(by=["o_year"], ascending=[True])
394-
395-
total.compute()
364+
final = final.sort_values(by=["o_year"], ascending=[True])
365+
final.compute()

0 commit comments

Comments
 (0)