Skip to content

Commit f09f195

Browse files
authored
feat(frontend): support two phase vnode based simple agg with approx_percentile (#18007)
1 parent 3cae1c3 commit f09f195

File tree

5 files changed

+217
-61
lines changed

5 files changed

+217
-61
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Single phase approx percentile
2+
statement ok
3+
create table t(p_col double, grp_col int);
4+
5+
statement ok
6+
insert into t select a, 1 from generate_series(-1000, 1000) t(a);
7+
8+
statement ok
9+
flush;
10+
11+
query I
12+
select
13+
percentile_cont(0.01) within group (order by p_col) as p01,
14+
min(p_col),
15+
percentile_cont(0.5) within group (order by p_col) as p50,
16+
count(*),
17+
percentile_cont(0.99) within group (order by p_col) as p99
18+
from t;
19+
----
20+
-980 -1000 0 2001 980
21+
22+
statement ok
23+
create materialized view m1 as
24+
select
25+
approx_percentile(0.01, 0.01) within group (order by p_col) as p01,
26+
min(p_col),
27+
approx_percentile(0.5, 0.01) within group (order by p_col) as p50,
28+
count(*),
29+
approx_percentile(0.99, 0.01) within group (order by p_col) as p99
30+
from t;
31+
32+
query I
33+
select * from m1;
34+
----
35+
-982.5779489474152 -1000 0 2001 982.5779489474152
36+
37+
# Test state encode / decode
38+
onlyif can-use-recover
39+
statement ok
40+
recover;
41+
42+
onlyif can-use-recover
43+
sleep 10s
44+
45+
query I
46+
select * from m1;
47+
----
48+
-982.5779489474152 -1000 0 2001 982.5779489474152
49+
50+
# Test 0<x<1 values
51+
statement ok
52+
insert into t select 0.001, 1 from generate_series(1, 500);
53+
54+
statement ok
55+
insert into t select 0.0001, 1 from generate_series(1, 501);
56+
57+
statement ok
58+
flush;
59+
60+
query I
61+
select * from m1;
62+
----
63+
-963.1209598593477 -1000 0.00009999833511933609 3002 963.1209598593477
64+
65+
query I
66+
select
67+
percentile_cont(0.01) within group (order by p_col) as p01,
68+
min(p_col),
69+
percentile_cont(0.5) within group (order by p_col) as p50,
70+
count(*),
71+
percentile_cont(0.99) within group (order by p_col) as p99
72+
from t;
73+
----
74+
-969.99 -1000 0.0001 3002 969.9899999999998
75+
76+
statement ok
77+
drop materialized view m1;
78+
79+
statement ok
80+
drop table t;

e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_normal_agg.slt e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateless_agg.slt

-13
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,6 @@ select * from m1;
4747
----
4848
-982.5779489474152 0 0 2001 982.5779489474152
4949

50-
# Test state encode / decode
51-
onlyif can-use-recover
52-
statement ok
53-
recover;
54-
55-
onlyif can-use-recover
56-
sleep 10s
57-
58-
query I
59-
select * from m1;
60-
----
61-
-982.5779489474152 0 0 2001 982.5779489474152
62-
6350
# Test 0<x<1 values
6451
statement ok
6552
insert into t select 0.001, 1 from generate_series(1, 500);

src/frontend/planner_test/tests/testdata/input/agg.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -1053,6 +1053,13 @@
10531053
sql: |
10541054
CREATE TABLE t (v1 int, v2 int);
10551055
SELECT sum(v1) as s1, approx_percentile(0.2, 0.01) WITHIN GROUP (order by v1 desc) from t;
1056+
expected_outputs:
1057+
- logical_plan
1058+
- stream_plan
1059+
- name: test simple approx_percentile with different approx_percentile interleaved with stateless + stateful simple aggs
1060+
sql: |
1061+
CREATE TABLE t (v1 int, v2 int);
1062+
SELECT sum(v1) as s1, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v1) as x, count(*), max(v2) as m2, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v2) as y from t;
10561063
expected_outputs:
10571064
- logical_plan
10581065
- stream_plan

src/frontend/planner_test/tests/testdata/output/agg.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -2040,3 +2040,33 @@
20402040
└─StreamShare { id: 2 }
20412041
└─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] }
20422042
└─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
2043+
- name: test simple approx_percentile with different approx_percentile interleaved with stateless + stateful simple aggs
2044+
sql: |
2045+
CREATE TABLE t (v1 int, v2 int);
2046+
SELECT sum(v1) as s1, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v1) as x, count(*), max(v2) as m2, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v2) as y from t;
2047+
logical_plan: |-
2048+
LogicalProject { exprs: [sum(t.v1), approx_percentile($expr1), count, max(t.v2), approx_percentile($expr2)] }
2049+
└─LogicalAgg { aggs: [sum(t.v1), approx_percentile($expr1), count, max(t.v2), approx_percentile($expr2)] }
2050+
└─LogicalProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2] }
2051+
└─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] }
2052+
stream_plan: |-
2053+
StreamMaterialize { columns: [s1, x, count, m2, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck }
2054+
└─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, max(max(t.v2)):Int32, approx_percentile:Float64] }
2055+
├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] }
2056+
│ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 }
2057+
│ │ └─StreamExchange { dist: Single }
2058+
│ │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 }
2059+
│ │ └─StreamShare { id: 2 }
2060+
│ │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] }
2061+
│ │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
2062+
│ └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 }
2063+
│ └─StreamExchange { dist: Single }
2064+
│ └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 }
2065+
│ └─StreamShare { id: 2 }
2066+
│ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] }
2067+
│ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
2068+
└─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), max(max(t.v2)), count] }
2069+
└─StreamExchange { dist: Single }
2070+
└─StreamHashAgg { group_key: [$expr5], aggs: [sum(t.v1), count, max(t.v2)] }
2071+
└─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr3, t.v2, t.v2::Float64 as $expr4, t._row_id, Vnode(t._row_id) as $expr5] }
2072+
└─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }

src/frontend/src/optimizer/plan_node/logical_agg.rs

+100-48
Original file line numberDiff line numberDiff line change
@@ -76,38 +76,17 @@ impl LogicalAgg {
7676
let mut core = self.core.clone();
7777

7878
// ====== Handle approx percentile aggs
79-
let SeparatedAggInfo { normal, approx } = self.separate_normal_and_special_agg();
80-
81-
let AggInfo {
82-
calls: non_approx_percentile_agg_calls,
83-
col_mapping: non_approx_percentile_col_mapping,
84-
} = normal;
85-
let AggInfo {
86-
calls: approx_percentile_agg_calls,
87-
col_mapping: approx_percentile_col_mapping,
88-
} = approx;
89-
90-
let needs_row_merge = (!non_approx_percentile_agg_calls.is_empty()
91-
&& !approx_percentile_agg_calls.is_empty())
92-
|| approx_percentile_agg_calls.len() >= 2;
93-
core.input = if needs_row_merge {
94-
// If there's row merge, we need to share the input.
95-
StreamShare::new_from_input(stream_input.clone()).into()
96-
} else {
97-
stream_input
98-
};
99-
core.agg_calls = non_approx_percentile_agg_calls;
79+
let (non_approx_percentile_col_mapping, approx_percentile_col_mapping, approx_percentile) =
80+
self.prepare_approx_percentile(&mut core, stream_input.clone())?;
10081

101-
let approx_percentile =
102-
self.build_approx_percentile_aggs(core.input.clone(), &approx_percentile_agg_calls)?;
103-
104-
// ====== Handle normal aggs
10582
if core.agg_calls.is_empty() {
10683
if let Some(approx_percentile) = approx_percentile {
10784
return Ok(approx_percentile);
10885
};
10986
bail!("expected at least one agg call");
11087
}
88+
89+
// ====== Handle normal aggs
11190
let total_agg_calls = core
11291
.agg_calls
11392
.iter()
@@ -123,21 +102,12 @@ impl LogicalAgg {
123102
new_stream_simple_agg(Agg::new(total_agg_calls, IndexSet::empty(), exchange));
124103

125104
// ====== Merge approx percentile and normal aggs
126-
if let Some(approx_percentile) = approx_percentile {
127-
if needs_row_merge {
128-
let row_merge = StreamRowMerge::new(
129-
approx_percentile,
130-
global_agg.into(),
131-
approx_percentile_col_mapping,
132-
non_approx_percentile_col_mapping,
133-
)?;
134-
Ok(row_merge.into())
135-
} else {
136-
Ok(approx_percentile)
137-
}
138-
} else {
139-
Ok(global_agg.into())
140-
}
105+
Self::add_row_merge_if_needed(
106+
approx_percentile,
107+
global_agg.into(),
108+
approx_percentile_col_mapping,
109+
non_approx_percentile_col_mapping,
110+
)
141111
}
142112

143113
/// Generate plan for stateless/stateful 2-phase streaming agg.
@@ -148,10 +118,21 @@ impl LogicalAgg {
148118
stream_input: PlanRef,
149119
dist_key: &[usize],
150120
) -> Result<PlanRef> {
151-
let input_col_num = stream_input.schema().len();
121+
let mut core = self.core.clone();
122+
123+
let (non_approx_percentile_col_mapping, approx_percentile_col_mapping, approx_percentile) =
124+
self.prepare_approx_percentile(&mut core, stream_input.clone())?;
125+
126+
if core.agg_calls.is_empty() {
127+
if let Some(approx_percentile) = approx_percentile {
128+
return Ok(approx_percentile);
129+
};
130+
bail!("expected at least one agg call");
131+
}
152132

153133
// Generate vnode via project
154134
// TODO(kwannoel): We should apply Project optimization rules here.
135+
let input_col_num = stream_input.schema().len(); // get schema len before moving `stream_input`.
155136
let project = StreamProject::new(generic::Project::with_vnode_col(stream_input, dist_key));
156137
let vnode_col_idx = project.base.schema().len() - 1;
157138

@@ -160,7 +141,7 @@ impl LogicalAgg {
160141
local_group_key.insert(vnode_col_idx);
161142
let n_local_group_key = local_group_key.len();
162143
let local_agg = new_stream_hash_agg(
163-
Agg::new(self.agg_calls().to_vec(), local_group_key, project.into()),
144+
Agg::new(core.agg_calls.to_vec(), local_group_key, project.into()),
164145
Some(vnode_col_idx),
165146
);
166147
// Global group key excludes vnode.
@@ -173,11 +154,11 @@ impl LogicalAgg {
173154
.expect("some input group key could not be mapped");
174155

175156
// Generate global agg step
176-
if self.group_key().is_empty() {
157+
let global_agg = if self.group_key().is_empty() {
177158
let exchange =
178159
RequiredDist::single().enforce_if_not_satisfies(local_agg.into(), &Order::any())?;
179160
let global_agg = new_stream_simple_agg(Agg::new(
180-
self.agg_calls()
161+
core.agg_calls
181162
.iter()
182163
.enumerate()
183164
.map(|(partial_output_idx, agg_call)| {
@@ -187,15 +168,15 @@ impl LogicalAgg {
187168
global_group_key.into_iter().collect(),
188169
exchange,
189170
));
190-
Ok(global_agg.into())
171+
global_agg.into()
191172
} else {
192173
let exchange = RequiredDist::shard_by_key(input_col_num, &global_group_key)
193174
.enforce_if_not_satisfies(local_agg.into(), &Order::any())?;
194175
// Local phase should have reordered the group keys into their required order.
195176
// we can just follow it.
196177
let global_agg = new_stream_hash_agg(
197178
Agg::new(
198-
self.agg_calls()
179+
core.agg_calls
199180
.iter()
200181
.enumerate()
201182
.map(|(partial_output_idx, agg_call)| {
@@ -208,8 +189,14 @@ impl LogicalAgg {
208189
),
209190
None,
210191
);
211-
Ok(global_agg.into())
212-
}
192+
global_agg.into()
193+
};
194+
Self::add_row_merge_if_needed(
195+
approx_percentile,
196+
global_agg,
197+
approx_percentile_col_mapping,
198+
non_approx_percentile_col_mapping,
199+
)
213200
}
214201

215202
fn gen_single_plan(&self, stream_input: PlanRef) -> Result<PlanRef> {
@@ -304,6 +291,71 @@ impl LogicalAgg {
304291
}
305292
}
306293

294+
/// Prepares metadata and the `approx_percentile` plan, if there's one present.
295+
/// It may modify `core.agg_calls` to separate normal agg and approx percentile agg,
296+
/// and `core.input` to share the input via `StreamShare`,
297+
/// to both approx percentile agg and normal agg.
298+
fn prepare_approx_percentile(
299+
&self,
300+
core: &mut Agg<PlanRef>,
301+
stream_input: PlanRef,
302+
) -> Result<(ColIndexMapping, ColIndexMapping, Option<PlanRef>)> {
303+
let SeparatedAggInfo { normal, approx } = self.separate_normal_and_special_agg();
304+
305+
let AggInfo {
306+
calls: non_approx_percentile_agg_calls,
307+
col_mapping: non_approx_percentile_col_mapping,
308+
} = normal;
309+
let AggInfo {
310+
calls: approx_percentile_agg_calls,
311+
col_mapping: approx_percentile_col_mapping,
312+
} = approx;
313+
if !self.group_key().is_empty() && !approx_percentile_agg_calls.is_empty() {
314+
bail_not_implemented!("two-phase approx percentile agg with group key, please use single phase agg for approx_percentile with group key");
315+
}
316+
317+
// Either we have approx percentile aggs and non_approx percentile aggs,
318+
// or we have at least 2 approx percentile aggs.
319+
let needs_row_merge = (!non_approx_percentile_agg_calls.is_empty()
320+
&& !approx_percentile_agg_calls.is_empty())
321+
|| approx_percentile_agg_calls.len() >= 2;
322+
core.input = if needs_row_merge {
323+
// If there's row merge, we need to share the input.
324+
StreamShare::new_from_input(stream_input.clone()).into()
325+
} else {
326+
stream_input
327+
};
328+
core.agg_calls = non_approx_percentile_agg_calls;
329+
330+
let approx_percentile =
331+
self.build_approx_percentile_aggs(core.input.clone(), &approx_percentile_agg_calls)?;
332+
Ok((
333+
non_approx_percentile_col_mapping,
334+
approx_percentile_col_mapping,
335+
approx_percentile,
336+
))
337+
}
338+
339+
/// Add `RowMerge` if needed
340+
fn add_row_merge_if_needed(
341+
approx_percentile: Option<PlanRef>,
342+
global_agg: PlanRef,
343+
approx_percentile_col_mapping: ColIndexMapping,
344+
non_approx_percentile_col_mapping: ColIndexMapping,
345+
) -> Result<PlanRef> {
346+
if let Some(approx_percentile) = approx_percentile {
347+
let row_merge = StreamRowMerge::new(
348+
approx_percentile,
349+
global_agg,
350+
approx_percentile_col_mapping,
351+
non_approx_percentile_col_mapping,
352+
)?;
353+
Ok(row_merge.into())
354+
} else {
355+
Ok(global_agg)
356+
}
357+
}
358+
307359
fn separate_normal_and_special_agg(&self) -> SeparatedAggInfo {
308360
let estimated_len = self.agg_calls().len() - 1;
309361
let mut approx_percentile_agg_calls = Vec::with_capacity(estimated_len);

0 commit comments

Comments
 (0)