Skip to content

Commit e71db77

Browse files
committed
chore: add logging for meta garbage collection operations
- Return approximate count of removed metadata keys from gc_drop_tables - Add extra logging throughout GC process for better observability - Update catalog interface to support metadata removal tracking - Note: DeleteByPrefix operations count as 1 but may remove multiple keys (currenyly not used in meta GC)
1 parent fe4636b commit e71db77

File tree

6 files changed

+90
-30
lines changed

6 files changed

+90
-30
lines changed

src/meta/api/src/garbage_collection_api.rs

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use databend_common_meta_app::tenant::Tenant;
4242
use databend_common_meta_kvapi::kvapi;
4343
use databend_common_meta_kvapi::kvapi::DirName;
4444
use databend_common_meta_kvapi::kvapi::Key;
45+
use databend_common_meta_types::txn_op::Request;
4546
use databend_common_meta_types::txn_op_response::Response;
4647
use databend_common_meta_types::MetaError;
4748
use databend_common_meta_types::TxnRequest;
@@ -77,19 +78,26 @@ where
7778
Self: Send + Sync,
7879
Self: kvapi::KVApi<Error = MetaError>,
7980
{
81+
/// Garbage collect dropped tables.
82+
///
83+
/// Returns the approximate number of metadata keys removed.
84+
/// Note: DeleteByPrefix operations count as 1 but may remove multiple keys.
8085
#[fastrace::trace]
81-
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<(), KVAppError> {
86+
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<usize, KVAppError> {
87+
let mut num_meta_key_removed = 0;
8288
for drop_id in req.drop_ids {
8389
match drop_id {
8490
DroppedId::Db { db_id, db_name } => {
85-
gc_dropped_db_by_id(self, db_id, &req.tenant, &req.catalog, db_name).await?
91+
num_meta_key_removed +=
92+
gc_dropped_db_by_id(self, db_id, &req.tenant, &req.catalog, db_name).await?
8693
}
8794
DroppedId::Table { name, id } => {
88-
gc_dropped_table_by_id(self, &req.tenant, &req.catalog, &name, &id).await?
95+
num_meta_key_removed +=
96+
gc_dropped_table_by_id(self, &req.tenant, &req.catalog, &name, &id).await?
8997
}
9098
}
9199
}
92-
Ok(())
100+
Ok(num_meta_key_removed)
93101
}
94102
}
95103

@@ -107,12 +115,15 @@ pub const ORPHAN_POSTFIX: &str = "orphan";
107115
///
108116
/// Dropped table can not be accessed by any query,
109117
/// so it is safe to remove all the copied files in multiple sub transactions.
118+
///
119+
/// Returns the number of copied file entries removed.
110120
async fn remove_copied_files_for_dropped_table(
111121
kv_api: &(impl kvapi::KVApi<Error = MetaError> + ?Sized),
112122
table_id: &TableId,
113-
) -> Result<(), MetaError> {
123+
) -> Result<usize, MetaError> {
114124
let batch_size = 1024;
115125

126+
let mut num_removed_copied_files = 0;
116127
// Loop until:
117128
// - all cleaned
118129
// - or table is removed from meta-service
@@ -122,7 +133,7 @@ async fn remove_copied_files_for_dropped_table(
122133

123134
let seq_meta = kv_api.get_pb(table_id).await?;
124135
let Some(seq_table_meta) = seq_meta else {
125-
return Ok(());
136+
return Ok(num_removed_copied_files);
126137
};
127138

128139
// TODO: enable this check. Currently when gc db, the table may not be dropped.
@@ -145,7 +156,7 @@ async fn remove_copied_files_for_dropped_table(
145156
let copied_files = key_stream.take(batch_size).try_collect::<Vec<_>>().await?;
146157

147158
if copied_files.is_empty() {
148-
return Ok(());
159+
return Ok(num_removed_copied_files);
149160
}
150161

151162
for copied_ident in copied_files.iter() {
@@ -163,6 +174,8 @@ async fn remove_copied_files_for_dropped_table(
163174
copied_files.display()
164175
);
165176

177+
num_removed_copied_files += copied_files.len();
178+
166179
// Txn failures are ignored for simplicity, since copied files kv pairs are put with ttl,
167180
// they will not be leaked permanently, will be cleaned eventually.
168181
send_txn(kv_api, txn).await?;
@@ -322,37 +335,45 @@ pub async fn get_history_tables_for_gc(
322335

323336
/// Permanently remove a dropped database from the meta-service.
324337
/// then remove all **dropped and non-dropped** tables in the database.
338+
///
339+
/// Returns the approximate number of metadata keys removed.
340+
/// Note: DeleteByPrefix operations count as 1 but may remove multiple keys.
325341
async fn gc_dropped_db_by_id(
326342
kv_api: &(impl GarbageCollectionApi + IndexApi + ?Sized),
327343
db_id: u64,
328344
tenant: &Tenant,
329345
catalog: &String,
330346
db_name: String,
331-
) -> Result<(), KVAppError> {
347+
) -> Result<usize, KVAppError> {
348+
let mut num_meta_keys_removed = 0;
332349
// List tables by tenant, db_id, table_name.
333350
let db_id_history_ident = DatabaseIdHistoryIdent::new(tenant, db_name.clone());
334351
let Some(seq_dbid_list) = kv_api.get_pb(&db_id_history_ident).await? else {
335-
return Ok(());
352+
info!("db_id_history_ident not found for db_id {}", db_id);
353+
return Ok(num_meta_keys_removed);
336354
};
337355

338356
let mut db_id_list = seq_dbid_list.data;
339357

340358
// If the db_id is not in the list, return.
341359
if db_id_list.id_list.remove_first(&db_id).is_none() {
342-
return Ok(());
360+
info!("db_id_history_ident of db_id {} is empty", db_id);
361+
return Ok(num_meta_keys_removed);
343362
}
344363

345364
let dbid = DatabaseId { db_id };
346365
let Some(seq_db_meta) = kv_api.get_pb(&dbid).await? else {
347-
return Ok(());
366+
info!("database meta of db_id {} is empty", db_id);
367+
return Ok(num_meta_keys_removed);
348368
};
349369

350370
if seq_db_meta.drop_on.is_none() {
351371
// If db is not marked as dropped, just ignore the gc request and return directly.
352372
// In subsequent KV transactions, we also verify that db_meta hasn't changed
353373
// to ensure we don't reclaim metadata of the given database that might have been
354374
// successfully undropped in a parallel operation.
355-
return Ok(());
375+
info!("database of db_id {} is not marked as dropped", db_id);
376+
return Ok(num_meta_keys_removed);
356377
}
357378

358379
// Mark database meta as gc_in_progress if necessary
@@ -397,13 +418,15 @@ async fn gc_dropped_db_by_id(
397418
};
398419
let dir_name = DirName::new_with_level(db_id_table_name, 1);
399420

421+
let mut num_db_id_table_name_keys_removed = 0;
400422
let batch_size = 1024;
401423
let key_stream = kv_api.list_pb_keys(&dir_name).await?;
402424
let mut chunks = key_stream.chunks(batch_size);
403425
while let Some(targets) = chunks.next().await {
404426
let mut txn = TxnRequest::default();
405427
use itertools::Itertools;
406428
let targets: Vec<DBIdTableName> = targets.into_iter().try_collect()?;
429+
num_db_id_table_name_keys_removed += targets.len();
407430
for target in &targets {
408431
txn.if_then.push(txn_op_del(target));
409432
}
@@ -423,11 +446,17 @@ async fn gc_dropped_db_by_id(
423446
);
424447
}
425448
}
449+
info!(
450+
"{} DbIdTableNames cleaned for database {}[{}]",
451+
num_db_id_table_name_keys_removed, db_name, db_id,
452+
);
453+
num_meta_keys_removed += num_db_id_table_name_keys_removed;
426454
}
427455

428456
let id_to_name = DatabaseIdToName { db_id };
429457
let Some(seq_name) = kv_api.get_pb(&id_to_name).await? else {
430-
return Ok(());
458+
info!("id_to_name not found for db_id {}", db_id);
459+
return Ok(num_meta_keys_removed);
431460
};
432461

433462
let table_history_ident = TableIdHistoryIdent {
@@ -444,7 +473,8 @@ async fn gc_dropped_db_by_id(
444473
for tb_id in table_history.id_list.iter() {
445474
let table_id_ident = TableId { table_id: *tb_id };
446475

447-
remove_copied_files_for_dropped_table(kv_api, &table_id_ident).await?;
476+
let num_removed_copied_files =
477+
remove_copied_files_for_dropped_table(kv_api, &table_id_ident).await?;
448478
let _ = remove_data_for_dropped_table(
449479
kv_api,
450480
tenant,
@@ -454,7 +484,7 @@ async fn gc_dropped_db_by_id(
454484
&mut txn,
455485
)
456486
.await?;
457-
remove_index_for_dropped_table(kv_api, tenant, &table_id_ident, &mut txn).await?;
487+
num_meta_keys_removed += num_removed_copied_files;
458488
}
459489

460490
txn.condition
@@ -481,24 +511,35 @@ async fn gc_dropped_db_by_id(
481511
.push(txn_cond_eq_seq(&id_to_name, seq_name.seq));
482512
txn.if_then.push(txn_op_del(&id_to_name));
483513

514+
// Count removed keys (approximate for DeleteByPrefix operations)
515+
for op in &txn.if_then {
516+
if let Some(Request::Delete(_) | Request::DeleteByPrefix(_)) = &op.request {
517+
num_meta_keys_removed += 1;
518+
}
519+
}
520+
484521
let _resp = kv_api.transaction(txn).await?;
485522

486-
Ok(())
523+
Ok(num_meta_keys_removed)
487524
}
488525

489526
/// Permanently remove a dropped table from the meta-service.
490527
///
491528
/// The data of the table should already have been removed before calling this method.
529+
///
530+
/// Returns the approximate number of metadata keys removed.
531+
/// Note: DeleteByPrefix operations count as 1 but may remove multiple keys.
492532
async fn gc_dropped_table_by_id(
493533
kv_api: &(impl GarbageCollectionApi + IndexApi + ?Sized),
494534
tenant: &Tenant,
495535
catalog: &String,
496536
db_id_table_name: &DBIdTableName,
497537
table_id_ident: &TableId,
498-
) -> Result<(), KVAppError> {
538+
) -> Result<usize, KVAppError> {
499539
// First remove all copied files for the dropped table.
500540
// These markers are not part of the table and can be removed in separate transactions.
501-
remove_copied_files_for_dropped_table(kv_api, table_id_ident).await?;
541+
let num_removed_copied_files =
542+
remove_copied_files_for_dropped_table(kv_api, table_id_ident).await?;
502543

503544
let mut trials = txn_backoff(None, func_name!());
504545
loop {
@@ -532,12 +573,22 @@ async fn gc_dropped_table_by_id(
532573
.await?;
533574

534575
// 3)
576+
535577
remove_index_for_dropped_table(kv_api, tenant, table_id_ident, &mut txn).await?;
536578

579+
// Count removed keys (approximate for DeleteByPrefix operations)
580+
let mut num_meta_keys_removed = 0;
581+
for op in &txn.if_then {
582+
if let Some(Request::Delete(_) | Request::DeleteByPrefix(_)) = &op.request {
583+
num_meta_keys_removed += 1;
584+
}
585+
}
586+
num_meta_keys_removed += num_removed_copied_files;
587+
537588
let (succ, _responses) = send_txn(kv_api, txn).await?;
538589

539590
if succ {
540-
return Ok(());
591+
return Ok(num_meta_keys_removed);
541592
}
542593
}
543594
}

src/query/catalog/src/catalog/interface.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,11 @@ pub trait Catalog: DynClone + Send + Sync + Debug {
324324
))
325325
}
326326

327-
async fn gc_drop_tables(&self, _req: GcDroppedTableReq) -> Result<()> {
327+
/// Garbage collect dropped tables and databases.
328+
///
329+
/// Returns the approximate number of metadata keys removed.
330+
/// Note: DeleteByPrefix operations count as 1 but may remove multiple keys.
331+
async fn gc_drop_tables(&self, _req: GcDroppedTableReq) -> Result<usize> {
328332
Err(ErrorCode::Unimplemented("'gc_drop_tables' not implemented"))
329333
}
330334

src/query/service/src/catalogs/default/database_catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ impl Catalog for DatabaseCatalog {
761761
self.mutable_catalog.get_drop_table_infos(req).await
762762
}
763763

764-
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<()> {
764+
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<usize> {
765765
self.mutable_catalog.gc_drop_tables(req).await
766766
}
767767

src/query/service/src/catalogs/default/mutable_catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ impl Catalog for MutableCatalog {
637637
Ok((tables, drop_ids))
638638
}
639639

640-
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<()> {
640+
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<usize> {
641641
let meta = self.ctx.meta.clone();
642642
let resp = meta.gc_drop_tables(req).await?;
643643
Ok(resp)

src/query/service/src/catalogs/default/session_catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ impl Catalog for SessionCatalog {
421421
self.inner.get_drop_table_infos(req).await
422422
}
423423

424-
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<()> {
424+
async fn gc_drop_tables(&self, req: GcDroppedTableReq) -> Result<usize> {
425425
self.inner.gc_drop_tables(req).await
426426
}
427427

src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,14 @@ impl VacuumDropTablesInterpreter {
5252
Ok(VacuumDropTablesInterpreter { ctx, plan })
5353
}
5454

55+
/// Vacuum metadata of dropped tables and databases.
56+
///
57+
/// Returns the approximate number of metadata keys removed.
5558
async fn gc_drop_tables(
5659
&self,
5760
catalog: Arc<dyn Catalog>,
5861
drop_ids: Vec<DroppedId>,
59-
) -> Result<()> {
62+
) -> Result<usize> {
6063
info!(
6164
"vacuum metadata of dropped table from db {:?}",
6265
self.plan.database,
@@ -83,6 +86,7 @@ impl VacuumDropTablesInterpreter {
8386

8487
let chunk_size = 50;
8588

89+
let mut num_meta_keys_removed = 0;
8690
// first gc drop table ids
8791
for c in drop_db_table_ids.chunks(chunk_size) {
8892
info!("vacuum drop {} table ids: {:?}", c.len(), c);
@@ -91,7 +95,7 @@ impl VacuumDropTablesInterpreter {
9195
catalog: self.plan.catalog.clone(),
9296
drop_ids: c.to_vec(),
9397
};
94-
catalog.gc_drop_tables(req).await?;
98+
num_meta_keys_removed += catalog.gc_drop_tables(req).await?;
9599
}
96100

97101
// then gc drop db ids
@@ -102,10 +106,10 @@ impl VacuumDropTablesInterpreter {
102106
catalog: self.plan.catalog.clone(),
103107
drop_ids: c.to_vec(),
104108
};
105-
catalog.gc_drop_tables(req).await?;
109+
num_meta_keys_removed += catalog.gc_drop_tables(req).await?;
106110
}
107111

108-
Ok(())
112+
Ok(num_meta_keys_removed)
109113
}
110114
}
111115

@@ -224,6 +228,7 @@ impl Interpreter for VacuumDropTablesInterpreter {
224228
.map(|id| *containing_db.get(id).unwrap())
225229
.collect::<HashSet<_>>();
226230

231+
let mut num_meta_keys_removed = 0;
227232
// gc metadata only when not dry run
228233
if self.plan.option.dry_run.is_none() {
229234
let mut success_dropped_ids = vec![];
@@ -253,15 +258,15 @@ impl Interpreter for VacuumDropTablesInterpreter {
253258
info!("failed table ids: {:?}", failed_tables);
254259
}
255260

256-
self.gc_drop_tables(catalog, success_dropped_ids).await?;
261+
num_meta_keys_removed = self.gc_drop_tables(catalog, success_dropped_ids).await?;
257262
}
258263

259264
let success_count = tables_count as u64 - failed_tables.len() as u64;
260265
let failed_count = failed_tables.len() as u64;
261266

262267
info!(
263-
"=== VACUUM DROP TABLE COMPLETED === success: {}, failed: {}, total: {}",
264-
success_count, failed_count, tables_count
268+
"=== VACUUM DROP TABLE COMPLETED === success: {}, failed: {}, total: {}, num_meta_keys_removed: {}",
269+
success_count, failed_count, tables_count, num_meta_keys_removed
265270
);
266271

267272
match files_opt {

0 commit comments

Comments
 (0)