Skip to content

Commit b81e93d

Browse files
committed
add object id type
1 parent 5c6b9a6 commit b81e93d

File tree

10 files changed

+127
-28
lines changed

10 files changed

+127
-28
lines changed

proto/hummock.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ enum DistanceType {
187187

188188
message VectorFileInfo {
189189
uint64 object_id = 1;
190+
uint64 file_size = 2;
190191
}
191192

192193
message FlatIndexConfig {}
@@ -291,6 +292,7 @@ message HummockVersionCheckpoint {
291292
message StaleObjects {
292293
repeated uint64 id = 1;
293294
uint64 total_file_size = 2;
295+
repeated uint64 vector_file_ids = 3;
294296
}
295297
HummockVersion version = 1;
296298
map<uint64, StaleObjects> stale_objects = 2;

src/ctl/src/cmd_impl/hummock/sst_dump.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,16 @@ pub async fn sst_dump(context: &CtlContext, args: SstDumpArgs) -> anyhow::Result
142142
while let Some(obj) = metadata_iter.try_next().await? {
143143
print_object(&obj);
144144
let obj_id = SstableStore::get_object_id_from_path(&obj.key);
145-
let HummockObjectId::Sstable(obj_id) = obj_id;
145+
let obj_id = match obj_id {
146+
HummockObjectId::Sstable(obj_id) => obj_id,
147+
HummockObjectId::VectorFile(_) => {
148+
println!(
149+
"object id {:?} not a sstable object id: {}. skip",
150+
obj_id, obj.key
151+
);
152+
continue;
153+
}
154+
};
146155
let meta_offset =
147156
get_meta_offset_from_object(&obj, sstable_store.store().as_ref()).await?;
148157
sst_dump_via_sstable_store(

src/meta/src/backup_restore/backup_manager.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,16 @@ impl BackupManager {
329329
// the compiler will warn us if we forget to handle it here.
330330
match HummockObjectId::Sstable(0.into()) {
331331
HummockObjectId::Sstable(_) => {}
332+
HummockObjectId::VectorFile(_) => {}
332333
};
333334
s.ssts
334335
.iter()
335336
.map(|sst_id| HummockObjectId::Sstable(*sst_id))
337+
.chain(
338+
s.vector_files
339+
.iter()
340+
.map(|vector_file_id| HummockObjectId::VectorFile(*vector_file_id)),
341+
)
336342
})
337343
.collect()
338344
}

src/meta/src/hummock/manager/checkpoint.rs

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,14 @@ impl HummockManager {
153153
.hummock_version_deltas
154154
.range((Excluded(old_checkpoint_id), Included(new_checkpoint_id)))
155155
{
156+
// DO NOT REMOVE THIS LINE
157+
// This is to ensure that when adding new variant to `HummockObjectId`,
158+
// the compiler will warn us if we forget to handle it here.
159+
match HummockObjectId::Sstable(0.into()) {
160+
HummockObjectId::Sstable(_) => {}
161+
HummockObjectId::VectorFile(_) => {}
162+
};
163+
// TODO: unify the for-loop logic with HummockVersionDelta::newly_added_object_ids
156164
for group_deltas in version_delta.group_deltas.values() {
157165
object_sizes.extend(
158166
group_deltas
@@ -187,17 +195,15 @@ impl HummockManager {
187195
.map(|t| (t.object_id, t.file_size))
188196
}),
189197
)
190-
.map(|(object_id, size)| {
191-
// DO NOT REMOVE THIS LINE
192-
// This is to ensure that when adding new variant to `HummockObjectId`,
193-
// the compiler will warn us if we forget to handle it here.
194-
match HummockObjectId::Sstable(0.into()) {
195-
HummockObjectId::Sstable(_) => {}
196-
};
197-
(HummockObjectId::Sstable(object_id), size)
198-
}),
198+
.map(|(object_id, size)| (HummockObjectId::Sstable(object_id), size)),
199199
);
200200
}
201+
object_sizes.extend(
202+
version_delta
203+
.vector_index_delta
204+
.values()
205+
.flat_map(|index_delta| index_delta.newly_added_objects()),
206+
);
201207
versions_object_ids.extend(version_delta.newly_added_object_ids(false));
202208
}
203209

@@ -212,19 +218,23 @@ impl HummockManager {
212218
})
213219
})
214220
.sum::<u64>();
215-
stale_objects.insert(
216-
current_version.id,
221+
stale_objects.insert(current_version.id, {
222+
let mut sst_ids = vec![];
223+
let mut vector_file_ids = vec![];
224+
for object_id in removed_object_ids {
225+
match object_id {
226+
HummockObjectId::Sstable(sst_id) => sst_ids.push(sst_id.inner()),
227+
HummockObjectId::VectorFile(vector_file_id) => {
228+
vector_file_ids.push(vector_file_id.inner())
229+
}
230+
}
231+
}
217232
StaleObjects {
218-
id: removed_object_ids
219-
.into_iter()
220-
.map(|object_id| {
221-
let HummockObjectId::Sstable(sst_id) = object_id;
222-
sst_id.inner()
223-
})
224-
.collect(),
233+
id: sst_ids,
225234
total_file_size,
226-
},
227-
);
235+
vector_file_ids,
236+
}
237+
});
228238
if self.env.opts.enable_hummock_data_archive {
229239
archive = Some(PbHummockVersionArchive {
230240
version: Some(PbHummockVersion::from(&old_checkpoint.version)),

src/meta/src/hummock/manager/time_travel.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,9 @@ impl HummockManager {
280280
let mut result: HashSet<_> = objects.collect();
281281
let mut remain_sst: VecDeque<_> = result
282282
.iter()
283-
.map(|object_id| {
284-
let HummockObjectId::Sstable(sst_id) = object_id;
285-
*sst_id
283+
.filter_map(|object_id| match object_id {
284+
HummockObjectId::Sstable(sst_id) => Some(*sst_id),
285+
HummockObjectId::VectorFile(_) => None,
286286
})
287287
.collect();
288288
while !remain_sst.is_empty() {
@@ -303,6 +303,9 @@ impl HummockManager {
303303
// the compiler will warn us if we forget to handle it here.
304304
match HummockObjectId::Sstable(0.into()) {
305305
HummockObjectId::Sstable(_) => {}
306+
HummockObjectId::VectorFile(_) => {
307+
let temp = 0;
308+
}
306309
};
307310
let reject: u64 = reject.try_into().unwrap();
308311
let object_id = HummockObjectId::Sstable(HummockSstableObjectId::from(reject));

src/storage/backup/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use itertools::Itertools;
3434
use risingwave_common::RW_VERSION;
3535
use risingwave_hummock_sdk::state_table_info::StateTableInfo;
3636
use risingwave_hummock_sdk::version::HummockVersion;
37-
use risingwave_hummock_sdk::{HummockSstableObjectId, HummockVersionId};
37+
use risingwave_hummock_sdk::{HummockSstableObjectId, HummockVectorFileId, HummockVersionId};
3838
use risingwave_pb::backup_service::{PbMetaSnapshotManifest, PbMetaSnapshotMetadata};
3939
use serde::{Deserialize, Serialize};
4040

@@ -52,6 +52,8 @@ pub struct MetaSnapshotMetadata {
5252
/// Bad naming, which won't be changed due to compatibility issue.
5353
pub ssts: HashSet<HummockSstableObjectId>,
5454
#[serde(default)]
55+
pub vector_files: HashSet<HummockVectorFileId>,
56+
#[serde(default)]
5557
pub format_version: u32,
5658
pub remarks: Option<String>,
5759
#[serde(default)]
@@ -70,6 +72,7 @@ impl MetaSnapshotMetadata {
7072
id,
7173
hummock_version_id: v.id,
7274
ssts: v.get_sst_object_ids(false),
75+
vector_files: v.get_vector_file_ids().collect(),
7376
format_version,
7477
remarks,
7578
state_table_info: v

src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ use crate::version::{
4343
IntraLevelDelta, IntraLevelDeltaCommon, ObjectIdReader, SstableIdReader,
4444
};
4545
use crate::{
46-
CompactionGroupId, HummockObjectId, HummockSstableId, HummockSstableObjectId, can_concat,
46+
CompactionGroupId, HummockObjectId, HummockSstableId, HummockSstableObjectId,
47+
HummockVectorFileId, can_concat,
4748
};
4849

4950
#[derive(Debug, Clone, Default)]
@@ -972,15 +973,23 @@ where
972973
.collect()
973974
}
974975

976+
pub fn get_vector_file_ids(&self) -> impl Iterator<Item = HummockVectorFileId> + '_ {
977+
self.vector_indexes
978+
.values()
979+
.flat_map(|index| index.get_objects().map(|(object_id, _)| object_id))
980+
}
981+
975982
pub fn get_object_ids(&self, exclude_change_log: bool) -> HashSet<HummockObjectId> {
976983
// DO NOT REMOVE THIS LINE
977984
// This is to ensure that when adding new variant to `HummockObjectId`,
978985
// the compiler will warn us if we forget to handle it here.
979986
match HummockObjectId::Sstable(0.into()) {
980987
HummockObjectId::Sstable(_) => {}
988+
HummockObjectId::VectorFile(_) => {}
981989
};
982990
self.get_sst_infos(exclude_change_log)
983991
.map(|s| HummockObjectId::Sstable(s.object_id()))
992+
.chain(self.get_vector_file_ids().map(HummockObjectId::VectorFile))
984993
.collect()
985994
}
986995

@@ -1456,6 +1465,7 @@ pub fn object_size_map(version: &HummockVersion) -> HashMap<HummockObjectId, u64
14561465
// the compiler will warn us if we forget to handle it here.
14571466
match HummockObjectId::Sstable(0.into()) {
14581467
HummockObjectId::Sstable(_) => {}
1468+
HummockObjectId::VectorFile(_) => {}
14591469
};
14601470
version
14611471
.levels
@@ -1476,6 +1486,11 @@ pub fn object_size_map(version: &HummockVersion) -> HashMap<HummockObjectId, u64
14761486
})
14771487
}))
14781488
.map(|(object_id, size)| (HummockObjectId::Sstable(object_id), size))
1489+
.chain(version.vector_indexes.values().flat_map(|index| {
1490+
index
1491+
.get_objects()
1492+
.map(|(vector_file_id, size)| (HummockObjectId::VectorFile(vector_file_id), size))
1493+
}))
14791494
.collect()
14801495
}
14811496

src/storage/hummock_sdk/src/lib.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ pub const FIRST_VERSION_ID: HummockVersionId = HummockVersionId(1);
239239
pub const SPLIT_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 1u64 << 56;
240240
pub const SINGLE_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 2u64 << 56;
241241
pub const SST_OBJECT_SUFFIX: &str = "data";
242+
pub const VECTOR_FILE_OBJECT_SUFFIX: &str = "vector";
242243
pub const HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH: usize = 20;
243244

244245
macro_rules! for_all_object_suffix {
@@ -250,7 +251,7 @@ macro_rules! for_all_object_suffix {
250251
)+
251252
}
252253

253-
pub const VALID_OBJECT_ID_SUFFIXES: [&str; 1] = [$(
254+
pub const VALID_OBJECT_ID_SUFFIXES: [&str; 2] = [$(
254255
$suffix
255256
),+];
256257

@@ -285,6 +286,7 @@ macro_rules! for_all_object_suffix {
285286
() => {
286287
for_all_object_suffix! {
287288
{Sstable, HummockSstableObjectId, SST_OBJECT_SUFFIX},
289+
{VectorFile, HummockVectorFileId, VECTOR_FILE_OBJECT_SUFFIX},
288290
}
289291
};
290292
}
@@ -299,11 +301,18 @@ pub fn get_stale_object_ids(
299301
// the compiler will warn us if we forget to handle it here.
300302
match HummockObjectId::Sstable(0.into()) {
301303
HummockObjectId::Sstable(_) => {}
304+
HummockObjectId::VectorFile(_) => {}
302305
};
303306
stale_objects
304307
.id
305308
.iter()
306309
.map(|sst_id| HummockObjectId::Sstable((*sst_id).into()))
310+
.chain(
311+
stale_objects
312+
.vector_file_ids
313+
.iter()
314+
.map(|vector_file_id| HummockObjectId::VectorFile((*vector_file_id).into())),
315+
)
307316
}
308317

309318
#[macro_export]

src/storage/hummock_sdk/src/vector_index.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@ use risingwave_pb::hummock::{
2525
PbVectorIndex, PbVectorIndexDelta, vector_index_delta,
2626
};
2727

28-
use crate::HummockVectorFileId;
28+
use crate::{HummockObjectId, HummockVectorFileId};
2929

3030
#[derive(Clone, Debug, PartialEq)]
3131
pub struct VectorFileInfo {
3232
pub object_id: HummockVectorFileId,
33+
pub file_size: u64,
3334
}
3435

3536
impl From<PbVectorFileInfo> for VectorFileInfo {
3637
fn from(pb: PbVectorFileInfo) -> Self {
3738
Self {
3839
object_id: pb.object_id.into(),
40+
file_size: pb.file_size,
3941
}
4042
}
4143
}
@@ -44,6 +46,7 @@ impl From<VectorFileInfo> for PbVectorFileInfo {
4446
fn from(info: VectorFileInfo) -> Self {
4547
Self {
4648
object_id: info.object_id.inner(),
49+
file_size: info.file_size,
4750
}
4851
}
4952
}
@@ -107,6 +110,17 @@ pub struct VectorIndex {
107110
pub inner: VectorIndexImpl,
108111
}
109112

113+
impl VectorIndex {
114+
pub fn get_objects(&self) -> impl Iterator<Item = (HummockVectorFileId, u64)> + '_ {
115+
match &self.inner {
116+
VectorIndexImpl::Flat(flat) => flat
117+
.vector_files
118+
.iter()
119+
.map(|file| (file.object_id, file.file_size)),
120+
}
121+
}
122+
}
123+
110124
impl From<PbVectorIndex> for VectorIndex {
111125
fn from(pb: PbVectorIndex) -> Self {
112126
Self {
@@ -186,6 +200,24 @@ impl From<PbVectorIndexDelta> for VectorIndexDelta {
186200
}
187201
}
188202

203+
impl VectorIndexDelta {
204+
pub fn newly_added_objects(&self) -> impl Iterator<Item = (HummockObjectId, u64)> + '_ {
205+
match self {
206+
VectorIndexDelta::Init(_) => None,
207+
VectorIndexDelta::Adds(adds) => Some(adds.iter().flat_map(|add| {
208+
match add {
209+
VectorIndexAdd::Flat(add) => add
210+
.added_vector_files
211+
.iter()
212+
.map(|file| (HummockObjectId::VectorFile(file.object_id), file.file_size)),
213+
}
214+
})),
215+
}
216+
.into_iter()
217+
.flatten()
218+
}
219+
}
220+
189221
impl From<VectorIndexDelta> for PbVectorIndexDelta {
190222
fn from(delta: VectorIndexDelta) -> Self {
191223
match delta {

src/storage/hummock_sdk/src/version.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,9 +599,19 @@ where
599599
// the compiler will warn us if we forget to handle it here.
600600
match HummockObjectId::Sstable(0.into()) {
601601
HummockObjectId::Sstable(_) => {}
602+
HummockObjectId::VectorFile(_) => {}
602603
};
603604
self.newly_added_sst_infos(exclude_table_change_log)
604605
.map(|sst| HummockObjectId::Sstable(sst.object_id()))
606+
.chain(
607+
self.vector_index_delta
608+
.values()
609+
.flat_map(|vector_index_delta| {
610+
vector_index_delta
611+
.newly_added_objects()
612+
.map(|(object_id, _)| object_id)
613+
}),
614+
)
605615
.collect()
606616
}
607617

0 commit comments

Comments
 (0)