From b66c4efd8ffbdc3e62b09f8b890da353c9d71818 Mon Sep 17 00:00:00 2001 From: Gabor Gevay Date: Fri, 20 Dec 2024 12:29:06 +0100 Subject: [PATCH 1/5] Sometimes we can give the type to MRE::take_safely --- src/expr/src/relation.rs | 40 +++++++++++++++++++- src/transform/src/equivalence_propagation.rs | 5 ++- src/transform/src/fold_constants.rs | 4 +- src/transform/src/predicate_pushdown.rs | 4 +- 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/expr/src/relation.rs b/src/expr/src/relation.rs index 2c2035a7fd1dc..b48c1417195f2 100644 --- a/src/expr/src/relation.rs +++ b/src/expr/src/relation.rs @@ -25,6 +25,7 @@ use mz_ore::collections::CollectionExt; use mz_ore::id_gen::IdGen; use mz_ore::metrics::Histogram; use mz_ore::num::NonNeg; +use mz_ore::soft_assert_no_log; use mz_ore::stack::RecursionLimitError; use mz_ore::str::Indent; use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError}; @@ -1688,8 +1689,40 @@ impl MirRelationExpr { } /// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the correct type. + /// + /// This calls `.typ()` to determine scalar types. If you already know the scalar types type, + /// then use either + /// [MirRelationExpr::take_safely_with_col_types] or [MirRelationExpr::take_safely_with_rel_type]. pub fn take_safely(&mut self) -> MirRelationExpr { - let typ = self.typ(); + self.take_safely_with_rel_type(self.typ()) + } + + /// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the given scalar + /// types. Keys and nullability are ignored in the given `RelationType`, and instead we set the + /// best possible key and nullability, since we are making an empty collection. + /// + /// Compared to `take_safely()`, this just saves the cost of the `.typ()` call. + pub fn take_safely_with_col_types(&mut self, typ: Vec) -> MirRelationExpr { + self.take_safely_with_rel_type(RelationType::new(typ)) + } + + /// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the given scalar + /// types. The given scalar types should be `base_eq` with the types that `typ()` would find. + /// Keys and nullability are ignored in the given `RelationType`, and instead we set the best + /// possible key and nullability, since we are making an empty collection. + /// + /// Compared to `take_safely()`, this just saves the cost of the `.typ()` call. + pub fn take_safely_with_rel_type(&mut self, mut typ: RelationType) -> MirRelationExpr { + soft_assert_no_log!(self + .typ() + .column_types + .iter() + .zip_eq(typ.column_types.iter()) + .all(|(t1, t2)| t1.scalar_type.base_eq(&t2.scalar_type))); + typ.keys = vec![vec![]]; + for ct in typ.column_types.iter_mut() { + ct.nullable = false; + } std::mem::replace( self, MirRelationExpr::Constant { @@ -1698,6 +1731,7 @@ impl MirRelationExpr { }, ) } + /// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with an **incorrect** type. /// /// This should only be used if `self` is about to be dropped or otherwise overwritten. @@ -2233,11 +2267,13 @@ impl MirRelationExpr { value.visit_pre_mut(|e| { if let MirRelationExpr::Get { id: crate::Id::Local(id), + typ, .. } = e { + let typ = typ.clone(); if deadlist.contains(id) { - e.take_safely(); + e.take_safely_with_rel_type(typ); } } }); diff --git a/src/transform/src/equivalence_propagation.rs b/src/transform/src/equivalence_propagation.rs index cbf7c9a5c3bdc..ba3facce1bf07 100644 --- a/src/transform/src/equivalence_propagation.rs +++ b/src/transform/src/equivalence_propagation.rs @@ -124,6 +124,7 @@ impl EquivalencePropagation { let expr_type = derived .value::() .expect("RelationType required"); + assert!(expr_type.is_some()); let expr_equivalences = derived .value::() .expect("Equivalences required"); @@ -132,7 +133,7 @@ impl EquivalencePropagation { let expr_equivalences = if let Some(e) = expr_equivalences { e } else { - expr.take_safely(); + expr.take_safely_with_col_types(expr_type.clone().unwrap()); return; }; @@ -147,7 +148,7 @@ impl EquivalencePropagation { outer_equivalences.minimize(expr_type.as_ref().map(|x| &x[..])); if outer_equivalences.unsatisfiable() { - expr.take_safely(); + expr.take_safely_with_col_types(expr_type.clone().unwrap()); return; } diff --git a/src/transform/src/fold_constants.rs b/src/transform/src/fold_constants.rs index 68016f7720b7a..58a60850fadc6 100644 --- a/src/transform/src/fold_constants.rs +++ b/src/transform/src/fold_constants.rs @@ -250,7 +250,7 @@ impl FoldConstants { .iter() .any(|p| p.is_literal_false() || p.is_literal_null()) { - relation.take_safely(); + relation.take_safely_with_rel_type(relation_type.clone()); } else if let Some((rows, ..)) = (**input).as_const() { // Evaluate errors last, to reduce risk of spurious errors. predicates.sort_by_key(|p| p.is_literal_err()); @@ -291,7 +291,7 @@ impl FoldConstants { .. } => { if inputs.iter().any(|e| e.is_empty()) { - relation.take_safely(); + relation.take_safely_with_rel_type(relation_type.clone()); } else if let Some(e) = inputs.iter().find_map(|i| i.as_const_err()) { *relation = MirRelationExpr::Constant { rows: Err(e.clone()), diff --git a/src/transform/src/predicate_pushdown.rs b/src/transform/src/predicate_pushdown.rs index 95c7dfa188f3b..7def77b8d21df 100644 --- a/src/transform/src/predicate_pushdown.rs +++ b/src/transform/src/predicate_pushdown.rs @@ -595,7 +595,9 @@ impl PredicatePushdown { .count() > 1 { - relation.take_safely(); + relation.take_safely_with_rel_type( + relation.typ_with_input_types(&input_types), + ); return Ok(()); } From a5e58a60b622f008e0403f655a808f80ffb50e61 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 19 Dec 2024 12:09:52 -0500 Subject: [PATCH 2/5] does recalculating types matter? --- src/adapter/src/coord/peek.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/adapter/src/coord/peek.rs b/src/adapter/src/coord/peek.rs index 79cc344e4a2ea..18bc9ca084509 100644 --- a/src/adapter/src/coord/peek.rs +++ b/src/adapter/src/coord/peek.rs @@ -315,13 +315,15 @@ pub fn create_fast_path_plan( if dataflow_plan.objects_to_build.len() >= 1 && dataflow_plan.objects_to_build[0].id == view_id { let mut mir = &*dataflow_plan.objects_to_build[0].plan.as_inner_mut(); - if let Some((rows, ..)) = mir.as_const() { + if let Some((rows, found_typ)) = mir.as_const() { // In the case of a constant, we can return the result now. + let recalc_typ = mir.typ(); + assert_eq!(found_typ, &recalc_typ); return Ok(Some(FastPathPlan::Constant( rows.clone() .map(|rows| rows.into_iter().map(|(row, diff)| (row, diff)).collect()), // For best accuracy, we need to recalculate typ. - mir.typ(), + recalc_typ, ))); } else { // If there is a TopK that would be completely covered by the finishing, then jump From 10aa41df6387e55c870df865c54c98e6e384e58d Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 19 Dec 2024 13:08:06 -0500 Subject: [PATCH 3/5] trying with just `found_typ` --- src/adapter/src/coord/peek.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/adapter/src/coord/peek.rs b/src/adapter/src/coord/peek.rs index 18bc9ca084509..d58502acde075 100644 --- a/src/adapter/src/coord/peek.rs +++ b/src/adapter/src/coord/peek.rs @@ -317,13 +317,11 @@ pub fn create_fast_path_plan( let mut mir = &*dataflow_plan.objects_to_build[0].plan.as_inner_mut(); if let Some((rows, found_typ)) = mir.as_const() { // In the case of a constant, we can return the result now. - let recalc_typ = mir.typ(); - assert_eq!(found_typ, &recalc_typ); return Ok(Some(FastPathPlan::Constant( rows.clone() .map(|rows| rows.into_iter().map(|(row, diff)| (row, diff)).collect()), // For best accuracy, we need to recalculate typ. - recalc_typ, + found_typ.clone(), ))); } else { // If there is a TopK that would be completely covered by the finishing, then jump From eeae1a8d123980540ca19c32dcb5854a3b0370e2 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 19 Dec 2024 14:35:03 -0500 Subject: [PATCH 4/5] rewrite tests, just two small differences in keys --- src/environmentd/tests/testdata/http/ws | 2 +- test/sqllogictest/explain/plan_insights.slt | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/environmentd/tests/testdata/http/ws b/src/environmentd/tests/testdata/http/ws index 8f606774e1c5f..0f1bdd464e947 100644 --- a/src/environmentd/tests/testdata/http/ws +++ b/src/environmentd/tests/testdata/http/ws @@ -384,7 +384,7 @@ ws-text ws-text {"query": "SELECT 1"} ---- -{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish output=[#0]\\n Map (1)\\n Constant\\n - ()\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Map\": {\n \"input\": {\n \"Constant\": {\n \"rows\": [\n {\n \"data\": []\n }\n ],\n \"typ\": {\n \"column_types\": [],\n \"keys\": []\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t66:\\n Finish output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t65\\n\\nt65:\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t66\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 65\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t65\",\n \"plan\": {\n \"Constant\": {\n \"rows\": {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish output=[#0]\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"Constant\": [\n {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {},\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT ''\"\n}","code":"MZ001","severity":"notice"}} +{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish output=[#0]\\n Map (1)\\n Constant\\n - ()\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Map\": {\n \"input\": {\n \"Constant\": {\n \"rows\": [\n {\n \"data\": []\n }\n ],\n \"typ\": {\n \"column_types\": [],\n \"keys\": []\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t66:\\n Finish output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t65\\n\\nt65:\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t66\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 65\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t65\",\n \"plan\": {\n \"Constant\": {\n \"rows\": {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish output=[#0]\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"Constant\": [\n {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {},\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT ''\"\n}","code":"MZ001","severity":"notice"}} {"type":"CommandStarting","payload":{"has_rows":true,"is_streaming":false}} {"type":"Rows","payload":{"columns":[{"name":"?column?","type_oid":23,"type_len":4,"type_mod":-1}]}} {"type":"Row","payload":["1"]} diff --git a/test/sqllogictest/explain/plan_insights.slt b/test/sqllogictest/explain/plan_insights.slt index 8d7489578fa1b..c55483035dec8 100644 --- a/test/sqllogictest/explain/plan_insights.slt +++ b/test/sqllogictest/explain/plan_insights.slt @@ -1248,9 +1248,7 @@ EXPLAIN PLAN INSIGHTS AS JSON FOR SELECT 'abc' "nullable": false } ], - "keys": [ - [] - ] + "keys": [] } ] } From 27b77cd82a03b38e10d781264f51506d8b840680 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 23 Dec 2024 09:11:50 -0500 Subject: [PATCH 5/5] drop stale comment --- src/adapter/src/coord/peek.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/adapter/src/coord/peek.rs b/src/adapter/src/coord/peek.rs index d58502acde075..1eb0505373594 100644 --- a/src/adapter/src/coord/peek.rs +++ b/src/adapter/src/coord/peek.rs @@ -320,7 +320,6 @@ pub fn create_fast_path_plan( return Ok(Some(FastPathPlan::Constant( rows.clone() .map(|rows| rows.into_iter().map(|(row, diff)| (row, diff)).collect()), - // For best accuracy, we need to recalculate typ. found_typ.clone(), ))); } else {