Skip to content

Commit 506d2b7

Browse files
authored
adapter: Implement expression cache (#30122)
This commit adds a module for an expression cache. It adds the functionality to durably cache optimized expressions. It uses the durable_cache module in its implementation. The expression cache is run on its own task, so that callers can insert new entries without blocking on the insert completing. The adapter doesn't use the expression cache yet, that is saved for a later commit. Works towards resolving #MaterializeInc/database-issues/issues/8384
1 parent 1945b14 commit 506d2b7

File tree

15 files changed

+733
-32
lines changed

15 files changed

+733
-32
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

doc/developer/design/20241008_expression_cache.md

+6-16
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,7 @@ struct Expressions {
7070
physical_plan: DataflowDescription<mz_compute_types::plan::Plan>,
7171
dataflow_metainfos: DataflowMetainfo<Arc<OptimizerNotice>>,
7272
notices: SmallVec<[Arc<OptimizerNotice>; 4]>,
73-
optimizer_feature_overrides: OptimizerFeatures,
74-
}
75-
76-
struct NewEntry {
77-
/// `GlobalId` of the new expression.
78-
id: GlobalId,
79-
/// New `Expressions` to cache.
80-
expressions: Expressions,
81-
/// `GlobalId`s to invalidate as a result of the new entry.
82-
invalidate_ids: BTreeSet<GlobalId>,
73+
optimizer_feature: OptimizerFeatures,
8374
}
8475

8576
struct ExpressionCache {
@@ -100,13 +91,12 @@ impl ExpressionCache {
10091
/// Returns all cached expressions in the current deploy generation, after reconciliation.
10192
fn open(&mut self, current_ids: &BTreeSet<GlobalId>, optimizer_features: &OptimizerFeatures, remove_prior_gens: bool) -> Vec<(GlobalId, Expressions)>;
10293

103-
/// Durably inserts `expressions` into current deploy generation. This may also invalidate
104-
/// entries giving by `expressions`.
105-
///
106-
/// Returns a [`Future`] that completes once the changes have been made durable.
94+
/// Durably removes all entries given by `invalidate_ids` and inserts `new_entries` into
95+
/// current deploy generation.
10796
///
108-
/// Panics if any `GlobalId` already exists in the cache.
109-
fn insert_expressions(&mut self, expressions: Vec<NewEntry>) -> impl Future<Output=()>;
97+
/// If there is a duplicate ID in both `invalidate_ids` and `new_entries`, then the final value
98+
/// will be taken from `new_entries`.
99+
fn insert_expressions(&mut self, new_entries: Vec<(GlobalId, Expressions)>, invalidate_ids: BTreeSet<GlobalId>);
110100

111101
/// Durably remove and return all entries in current deploy generation that depend on an ID in
112102
/// `dropped_ids` .

src/catalog/BUILD.bazel

+18
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ rust_library(
3636
"//src/build-info:mz_build_info",
3737
"//src/cloud-resources:mz_cloud_resources",
3838
"//src/compute-client:mz_compute_client",
39+
"//src/compute-types:mz_compute_types",
3940
"//src/controller:mz_controller",
4041
"//src/controller-types:mz_controller_types",
42+
"//src/durable-cache:mz_durable_cache",
4143
"//src/expr:mz_expr",
4244
"//src/orchestrator:mz_orchestrator",
4345
"//src/ore:mz_ore",
@@ -53,6 +55,7 @@ rust_library(
5355
"//src/storage-client:mz_storage_client",
5456
"//src/storage-controller:mz_storage_controller",
5557
"//src/storage-types:mz_storage_types",
58+
"//src/transform:mz_transform",
5659
] + all_crate_deps(normal = True),
5760
)
5861

@@ -87,8 +90,10 @@ rust_test(
8790
"//src/build-tools:mz_build_tools",
8891
"//src/cloud-resources:mz_cloud_resources",
8992
"//src/compute-client:mz_compute_client",
93+
"//src/compute-types:mz_compute_types",
9094
"//src/controller:mz_controller",
9195
"//src/controller-types:mz_controller_types",
96+
"//src/durable-cache:mz_durable_cache",
9297
"//src/expr:mz_expr",
9398
"//src/orchestrator:mz_orchestrator",
9499
"//src/ore:mz_ore",
@@ -105,6 +110,7 @@ rust_test(
105110
"//src/storage-client:mz_storage_client",
106111
"//src/storage-controller:mz_storage_controller",
107112
"//src/storage-types:mz_storage_types",
113+
"//src/transform:mz_transform",
108114
] + all_crate_deps(
109115
normal = True,
110116
normal_dev = True,
@@ -121,8 +127,10 @@ rust_doc_test(
121127
"//src/build-tools:mz_build_tools",
122128
"//src/cloud-resources:mz_cloud_resources",
123129
"//src/compute-client:mz_compute_client",
130+
"//src/compute-types:mz_compute_types",
124131
"//src/controller:mz_controller",
125132
"//src/controller-types:mz_controller_types",
133+
"//src/durable-cache:mz_durable_cache",
126134
"//src/expr:mz_expr",
127135
"//src/orchestrator:mz_orchestrator",
128136
"//src/ore:mz_ore",
@@ -139,6 +147,7 @@ rust_doc_test(
139147
"//src/storage-client:mz_storage_client",
140148
"//src/storage-controller:mz_storage_controller",
141149
"//src/storage-types:mz_storage_types",
150+
"//src/transform:mz_transform",
142151
] + all_crate_deps(
143152
normal = True,
144153
normal_dev = True,
@@ -193,8 +202,10 @@ rust_test(
193202
"//src/build-tools:mz_build_tools",
194203
"//src/cloud-resources:mz_cloud_resources",
195204
"//src/compute-client:mz_compute_client",
205+
"//src/compute-types:mz_compute_types",
196206
"//src/controller:mz_controller",
197207
"//src/controller-types:mz_controller_types",
208+
"//src/durable-cache:mz_durable_cache",
198209
"//src/expr:mz_expr",
199210
"//src/orchestrator:mz_orchestrator",
200211
"//src/ore:mz_ore",
@@ -211,6 +222,7 @@ rust_test(
211222
"//src/storage-client:mz_storage_client",
212223
"//src/storage-controller:mz_storage_controller",
213224
"//src/storage-types:mz_storage_types",
225+
"//src/transform:mz_transform",
214226
] + all_crate_deps(
215227
normal = True,
216228
normal_dev = True,
@@ -247,8 +259,10 @@ rust_test(
247259
"//src/build-tools:mz_build_tools",
248260
"//src/cloud-resources:mz_cloud_resources",
249261
"//src/compute-client:mz_compute_client",
262+
"//src/compute-types:mz_compute_types",
250263
"//src/controller:mz_controller",
251264
"//src/controller-types:mz_controller_types",
265+
"//src/durable-cache:mz_durable_cache",
252266
"//src/expr:mz_expr",
253267
"//src/orchestrator:mz_orchestrator",
254268
"//src/ore:mz_ore",
@@ -265,6 +279,7 @@ rust_test(
265279
"//src/storage-client:mz_storage_client",
266280
"//src/storage-controller:mz_storage_controller",
267281
"//src/storage-types:mz_storage_types",
282+
"//src/transform:mz_transform",
268283
] + all_crate_deps(
269284
normal = True,
270285
normal_dev = True,
@@ -301,8 +316,10 @@ rust_test(
301316
"//src/build-tools:mz_build_tools",
302317
"//src/cloud-resources:mz_cloud_resources",
303318
"//src/compute-client:mz_compute_client",
319+
"//src/compute-types:mz_compute_types",
304320
"//src/controller:mz_controller",
305321
"//src/controller-types:mz_controller_types",
322+
"//src/durable-cache:mz_durable_cache",
306323
"//src/expr:mz_expr",
307324
"//src/orchestrator:mz_orchestrator",
308325
"//src/ore:mz_ore",
@@ -319,6 +336,7 @@ rust_test(
319336
"//src/storage-client:mz_storage_client",
320337
"//src/storage-controller:mz_storage_controller",
321338
"//src/storage-types:mz_storage_types",
339+
"//src/transform:mz_transform",
322340
] + all_crate_deps(
323341
normal = True,
324342
normal_dev = True,

src/catalog/Cargo.toml

+6-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ mz-audit-log = { path = "../audit-log" }
2727
mz-build-info = { path = "../build-info" }
2828
mz-cloud-resources = { path = "../cloud-resources" }
2929
mz-compute-client = { path = "../compute-client" }
30+
mz-compute-types = { path = "../compute-types" }
3031
mz-controller = { path = "../controller" }
3132
mz-controller-types = { path = "../controller-types" }
33+
mz-durable-cache = { path = "../durable-cache" }
3234
mz-expr = { path = "../expr" }
3335
mz-orchestrator = { path = "../orchestrator" }
3436
mz-ore = { path = "../ore", features = ["chrono", "async", "tracing_"] }
@@ -44,6 +46,7 @@ mz-sql-parser = { path = "../sql-parser" }
4446
mz-storage-client = { path = "../storage-client" }
4547
mz-storage-controller = { path = "../storage-controller" }
4648
mz-storage-types = { path = "../storage-types" }
49+
mz-transform = { path = "../transform" }
4750
paste = "1.0.11"
4851
prometheus = { version = "0.13.3", default-features = false }
4952
proptest = { version = "1.0.0", default-features = false, features = ["std"] }
@@ -55,11 +58,13 @@ semver = { version = "1.0.16" }
5558
serde = "1.0.152"
5659
serde_json = "1.0.125"
5760
serde_plain = "1.0.1"
61+
smallvec = { version = "1.10.0", features = ["union"] }
5862
static_assertions = "1.1"
5963
sha2 = "0.10.6"
64+
thiserror = "1.0.37"
6065
timely = "0.12.0"
66+
tokio = { version = "1.38.0" }
6167
tracing = "0.1.37"
62-
thiserror = "1.0.37"
6368
uuid = "1.2.2"
6469
workspace-hack = { version = "0.0.0", path = "../workspace-hack" }
6570

@@ -69,7 +74,6 @@ insta = "1.32"
6974
mz-build-tools = { path = "../build-tools", default-features = false }
7075
mz-postgres-util = { path = "../postgres-util" }
7176
similar-asserts = "1.4"
72-
tokio = { version = "1.38.0" }
7377
tokio-postgres = { version = "0.7.8" }
7478

7579
[build-dependencies]

src/catalog/src/durable.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub use crate::durable::objects::{
3636
Role, Schema, SourceReference, SourceReferences, StorageCollectionMetadata,
3737
SystemConfiguration, SystemObjectDescription, SystemObjectMapping, UnfinalizedShard,
3838
};
39-
pub use crate::durable::persist::builtin_migration_shard_id;
39+
pub use crate::durable::persist::{builtin_migration_shard_id, expression_cache_shard_id};
4040
use crate::durable::persist::{Timestamp, UnopenedPersistCatalogState};
4141
pub use crate::durable::transaction::Transaction;
4242
use crate::durable::transaction::TransactionBatch;

src/catalog/src/durable/persist.rs

+8
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ const CATALOG_SEED: usize = 1;
117117
const UPGRADE_SEED: usize = 2;
118118
/// Seed used to generate the persist shard ID for builtin table migrations.
119119
const BUILTIN_MIGRATION_SEED: usize = 3;
120+
/// Seed used to generate the persist shard ID for the expression cache.
121+
const EXPRESSION_CACHE_SEED: usize = 4;
120122

121123
/// Durable catalog mode that dictates the effect of mutable operations.
122124
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@@ -1701,6 +1703,12 @@ pub fn builtin_migration_shard_id(organization_id: Uuid) -> ShardId {
17011703
shard_id(organization_id, BUILTIN_MIGRATION_SEED)
17021704
}
17031705

1706+
/// Deterministically generate an expression cache shard ID for the given
1707+
/// `organization_id`.
1708+
pub fn expression_cache_shard_id(organization_id: Uuid) -> ShardId {
1709+
shard_id(organization_id, EXPRESSION_CACHE_SEED)
1710+
}
1711+
17041712
/// Deterministically generate a shard ID for the given `organization_id` and `seed`.
17051713
fn shard_id(organization_id: Uuid, seed: usize) -> ShardId {
17061714
let hash = sha2::Sha256::digest(format!("{organization_id}{seed}")).to_vec();

0 commit comments

Comments
 (0)