diff --git a/Cargo.lock b/Cargo.lock index 23e2e0a6550a8..2ec747e3f7227 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9320,9 +9320,9 @@ dependencies = [ [[package]] name = "kvdb-rocksdb" -version = "0.20.1" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b089b6062662d720a836f055931434439fcd3a90f0059db0b831a99da6db460" +checksum = "739ac938a308a9a8b6772fd1d840fd9c0078f9c74fe294feaf32faae727102cc" dependencies = [ "kvdb", "num_cpus", @@ -23404,6 +23404,7 @@ name = "sp-database" version = "10.0.0" dependencies = [ "kvdb", + "kvdb-rocksdb", "parking_lot 0.12.3", ] diff --git a/Cargo.toml b/Cargo.toml index 1507bab180ba0..2672bce99a26d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -895,7 +895,7 @@ k256 = { version = "0.13.4", default-features = false } kitchensink-runtime = { path = "substrate/bin/node/runtime" } kvdb = { version = "0.13.0" } kvdb-memorydb = { version = "0.13.0" } -kvdb-rocksdb = { version = "0.20.1" } +kvdb-rocksdb = { version = "0.21.0" } kvdb-shared-tests = { version = "0.11.0" } landlock = { version = "0.3.0" } libc = { version = "0.2.155" } diff --git a/prdoc/pr_10495.prdoc b/prdoc/pr_10495.prdoc new file mode 100644 index 0000000000000..5eb3d2a0e6d3b --- /dev/null +++ b/prdoc/pr_10495.prdoc @@ -0,0 +1,14 @@ +title: 'Rocksdb: Force compact columns on after warp sync' +doc: +- audience: Node Operator + description: |- + Recently we introduced a change that was always force compacting a Rocksdb database when starting a node and after writing a lot of data. We found out that force compacting a huge RocksDB of more than 600GB takes quite some time (more than one hour) and this every time. + + So, this pull request changes the compaction to only happen after warp sync (and genesis) when we reset the state column to some given state. This way we don't run it anymore on startup of the node and it should fix the problems we have seen with archive nodes. +crates: +- name: sc-client-db + bump: patch + validate: false +- name: sp-database + bump: patch + validate: false diff --git a/substrate/client/db/Cargo.toml b/substrate/client/db/Cargo.toml index 4c7296032f2b9..2a8e2a78e11a1 100644 --- a/substrate/client/db/Cargo.toml +++ b/substrate/client/db/Cargo.toml @@ -27,7 +27,7 @@ codec = { features = ["derive"], workspace = true, default-features = true } hash-db = { workspace = true, default-features = true } kvdb = { workspace = true } kvdb-memorydb = { workspace = true } -kvdb-rocksdb = { optional = true, workspace = true } +kvdb-rocksdb = { optional = true, workspace = true, features = ["jemalloc"] } linked-hash-map = { workspace = true } log = { workspace = true, default-features = true } parity-db = { workspace = true } @@ -62,4 +62,4 @@ runtime-benchmarks = [ "kitchensink-runtime/runtime-benchmarks", "sp-runtime/runtime-benchmarks", ] -rocksdb = ["kvdb-rocksdb"] +rocksdb = ["kvdb-rocksdb", "sp-database/rocksdb"] diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index 7731d54d407ae..d86b722963b5b 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -839,6 +839,7 @@ pub struct BlockImportOperation { set_head: Option, commit_state: bool, create_gap: bool, + reset_storage: bool, index_ops: Vec, } @@ -934,6 +935,7 @@ impl sc_client_api::backend::BlockImportOperation ) -> ClientResult { let root = self.apply_new_state(storage, state_version)?; self.commit_state = true; + self.reset_storage = true; Ok(root) } @@ -1841,6 +1843,14 @@ impl Backend { self.storage.db.commit(transaction)?; + // `reset_storage == true` means the entire state got replaced. + // In this case we optimize the `STATE` column to improve read performance. + if operation.reset_storage { + if let Err(e) = self.storage.db.optimize_db_col(columns::STATE) { + warn!(target: "db", "Failed to optimize database after state import: {e:?}"); + } + } + // Apply all in-memory state changes. // Code beyond this point can't fail. @@ -2152,6 +2162,7 @@ impl sc_client_api::backend::Backend for Backend { set_head: None, commit_state: false, create_gap: true, + reset_storage: false, index_ops: Default::default(), }) } diff --git a/substrate/client/db/src/utils.rs b/substrate/client/db/src/utils.rs index a79f5ab3ac7d9..b00e6e2da4593 100644 --- a/substrate/client/db/src/utils.rs +++ b/substrate/client/db/src/utils.rs @@ -349,7 +349,7 @@ fn open_kvdb_rocksdb( let db = kvdb_rocksdb::Database::open(&db_config, path)?; // write database version only after the database is successfully opened crate::upgrade::update_version(path)?; - Ok(sp_database::as_database(db)) + Ok(sp_database::as_rocksdb_database(db)) } #[cfg(not(any(feature = "rocksdb", test)))] diff --git a/substrate/primitives/database/Cargo.toml b/substrate/primitives/database/Cargo.toml index 1795fece602ea..dfcef4c858254 100644 --- a/substrate/primitives/database/Cargo.toml +++ b/substrate/primitives/database/Cargo.toml @@ -15,4 +15,9 @@ workspace = true [dependencies] kvdb = { workspace = true } +kvdb-rocksdb = { optional = true, workspace = true } parking_lot = { workspace = true, default-features = true } + +[features] +default = [] +rocksdb = ["kvdb-rocksdb"] diff --git a/substrate/primitives/database/src/kvdb.rs b/substrate/primitives/database/src/kvdb.rs index 735813c368570..e3a3535f2c2d2 100644 --- a/substrate/primitives/database/src/kvdb.rs +++ b/substrate/primitives/database/src/kvdb.rs @@ -31,7 +31,75 @@ fn handle_err(result: std::io::Result) -> T { } } -/// Wrap RocksDb database into a trait object that implements `sp_database::Database` +/// Read the reference counter for a key. +fn read_counter( + db: &dyn KeyValueDB, + col: ColumnId, + key: &[u8], +) -> error::Result<(Vec, Option)> { + let mut counter_key = key.to_vec(); + counter_key.push(0); + Ok(match db.get(col, &counter_key).map_err(|e| error::DatabaseError(Box::new(e)))? { + Some(data) => { + let mut counter_data = [0; 4]; + if data.len() != 4 { + return Err(error::DatabaseError(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Unexpected counter len {}", data.len()), + )))) + } + counter_data.copy_from_slice(&data); + let counter = u32::from_le_bytes(counter_data); + (counter_key, Some(counter)) + }, + None => (counter_key, None), + }) +} + +/// Commit a transaction to a KeyValueDB. +fn commit_impl>( + db: &dyn KeyValueDB, + transaction: Transaction, +) -> error::Result<()> { + let mut tx = DBTransaction::new(); + for change in transaction.0.into_iter() { + match change { + Change::Set(col, key, value) => tx.put_vec(col, &key, value), + Change::Remove(col, key) => tx.delete(col, &key), + Change::Store(col, key, value) => match read_counter(db, col, key.as_ref())? { + (counter_key, Some(mut counter)) => { + counter += 1; + tx.put(col, &counter_key, &counter.to_le_bytes()); + }, + (counter_key, None) => { + let d = 1u32.to_le_bytes(); + tx.put(col, &counter_key, &d); + tx.put_vec(col, key.as_ref(), value); + }, + }, + Change::Reference(col, key) => { + if let (counter_key, Some(mut counter)) = read_counter(db, col, key.as_ref())? { + counter += 1; + tx.put(col, &counter_key, &counter.to_le_bytes()); + } + }, + Change::Release(col, key) => { + if let (counter_key, Some(mut counter)) = read_counter(db, col, key.as_ref())? { + counter -= 1; + if counter == 0 { + tx.delete(col, &counter_key); + tx.delete(col, key.as_ref()); + } else { + tx.put(col, &counter_key, &counter.to_le_bytes()); + } + } + }, + } + } + db.write(tx).map_err(|e| error::DatabaseError(Box::new(e))) +} + +/// Wrap generic kvdb-based database into a trait object that implements [`Database`]. pub fn as_database(db: D) -> std::sync::Arc> where D: KeyValueDB + 'static, @@ -40,72 +108,28 @@ where std::sync::Arc::new(DbAdapter(db)) } -impl DbAdapter { - // Returns counter key and counter value if it exists. - fn read_counter(&self, col: ColumnId, key: &[u8]) -> error::Result<(Vec, Option)> { - // Add a key suffix for the counter - let mut counter_key = key.to_vec(); - counter_key.push(0); - Ok(match self.0.get(col, &counter_key).map_err(|e| error::DatabaseError(Box::new(e)))? { - Some(data) => { - let mut counter_data = [0; 4]; - if data.len() != 4 { - return Err(error::DatabaseError(Box::new(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Unexpected counter len {}", data.len()), - )))) - } - counter_data.copy_from_slice(&data); - let counter = u32::from_le_bytes(counter_data); - (counter_key, Some(counter)) - }, - None => (counter_key, None), - }) +impl> Database for DbAdapter { + fn commit(&self, transaction: Transaction) -> error::Result<()> { + commit_impl(&self.0, transaction) + } + + fn get(&self, col: ColumnId, key: &[u8]) -> Option> { + handle_err(self.0.get(col, key)) + } + + fn contains(&self, col: ColumnId, key: &[u8]) -> bool { + handle_err(self.0.has_key(col, key)) } } -impl> Database for DbAdapter { +/// RocksDB-specific adapter that implements `optimize_db` via `force_compact`. +#[cfg(feature = "rocksdb")] +pub struct RocksDbAdapter(kvdb_rocksdb::Database); + +#[cfg(feature = "rocksdb")] +impl> Database for RocksDbAdapter { fn commit(&self, transaction: Transaction) -> error::Result<()> { - let mut tx = DBTransaction::new(); - for change in transaction.0.into_iter() { - match change { - Change::Set(col, key, value) => tx.put_vec(col, &key, value), - Change::Remove(col, key) => tx.delete(col, &key), - Change::Store(col, key, value) => match self.read_counter(col, key.as_ref())? { - (counter_key, Some(mut counter)) => { - counter += 1; - tx.put(col, &counter_key, &counter.to_le_bytes()); - }, - (counter_key, None) => { - let d = 1u32.to_le_bytes(); - tx.put(col, &counter_key, &d); - tx.put_vec(col, key.as_ref(), value); - }, - }, - Change::Reference(col, key) => { - if let (counter_key, Some(mut counter)) = - self.read_counter(col, key.as_ref())? - { - counter += 1; - tx.put(col, &counter_key, &counter.to_le_bytes()); - } - }, - Change::Release(col, key) => { - if let (counter_key, Some(mut counter)) = - self.read_counter(col, key.as_ref())? - { - counter -= 1; - if counter == 0 { - tx.delete(col, &counter_key); - tx.delete(col, key.as_ref()); - } else { - tx.put(col, &counter_key, &counter.to_le_bytes()); - } - } - }, - } - } - self.0.write(tx).map_err(|e| error::DatabaseError(Box::new(e))) + commit_impl(&self.0, transaction) } fn get(&self, col: ColumnId, key: &[u8]) -> Option> { @@ -115,4 +139,17 @@ impl> Database for DbAdapter { fn contains(&self, col: ColumnId, key: &[u8]) -> bool { handle_err(self.0.has_key(col, key)) } + + fn optimize_db_col(&self, col: ColumnId) -> error::Result<()> { + self.0.force_compact(col).map_err(|e| error::DatabaseError(Box::new(e))) + } +} + +/// Wrap RocksDB database into a trait object with `optimize_db` support. +#[cfg(feature = "rocksdb")] +pub fn as_rocksdb_database(db: kvdb_rocksdb::Database) -> std::sync::Arc> +where + H: Clone + AsRef<[u8]>, +{ + std::sync::Arc::new(RocksDbAdapter(db)) } diff --git a/substrate/primitives/database/src/lib.rs b/substrate/primitives/database/src/lib.rs index 42920bbefb499..1f578349eaff7 100644 --- a/substrate/primitives/database/src/lib.rs +++ b/substrate/primitives/database/src/lib.rs @@ -22,6 +22,8 @@ mod kvdb; mod mem; pub use crate::kvdb::as_database; +#[cfg(feature = "rocksdb")] +pub use crate::kvdb::as_rocksdb_database; pub use mem::MemDb; /// An identifier for a column. @@ -117,6 +119,11 @@ pub trait Database>: Send + Sync { /// /// Not all database implementations use a prefix for keys, so this function may be a noop. fn sanitize_key(&self, _key: &mut Vec) {} + + /// Optimize a database column. + fn optimize_db_col(&self, _col: ColumnId) -> error::Result<()> { + Ok(()) + } } impl std::fmt::Debug for dyn Database {