diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ad53464..3c9c6b9d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,7 @@ set(ENABLE_MIMIC_VDSO off) option(BUILD_CURL_FROM_SOURCE "Compile static libcurl" off) option(ORIGIN_EXT2FS "Use original libext2fs" off) +option(ENABLE_REWRITE_STATS "Enable block rewrite statistics tracking (for overlaybd-commit tool)" off) find_package(photon REQUIRED) find_package(tcmu REQUIRED) find_package(yamlcpp) diff --git a/src/overlaybd/lsmt/CMakeLists.txt b/src/overlaybd/lsmt/CMakeLists.txt index 24d5e5b1..44409505 100644 --- a/src/overlaybd/lsmt/CMakeLists.txt +++ b/src/overlaybd/lsmt/CMakeLists.txt @@ -5,6 +5,10 @@ target_include_directories(lsmt_lib PUBLIC ${PHOTON_INCLUDE_DIR} ) +if(ENABLE_REWRITE_STATS) + target_compile_definitions(lsmt_lib PRIVATE ENABLE_REWRITE_STATS) +endif() + if(BUILD_TESTING) add_subdirectory(test) endif() diff --git a/src/overlaybd/lsmt/file.cpp b/src/overlaybd/lsmt/file.cpp index 1a2af5b0..8aca4764 100644 --- a/src/overlaybd/lsmt/file.cpp +++ b/src/overlaybd/lsmt/file.cpp @@ -990,8 +990,12 @@ class LSMTFile : public LSMTReadOnlyFile { DataStat data_stat; data_stat.total_data_size = (buf.st_size - HeaderTrailer::SPACE); data_stat.valid_data_size = index()->block_count() * ALIGNMENT; - LOG_DEBUG("data_size: ` ( valid: ` )", data_stat.total_data_size, - data_stat.valid_data_size); + // Get rewrite stats from the index + auto rw_stats = index()->rewrite_stats(); + data_stat.total_blocks_written = rw_stats.total_blocks_written; + data_stat.rewritten_blocks = rw_stats.rewritten_blocks; + LOG_DEBUG("data_size: ` ( valid: ` ), rewrites: `/`", data_stat.total_data_size, + data_stat.valid_data_size, data_stat.rewritten_blocks, data_stat.total_blocks_written); return data_stat; } diff --git a/src/overlaybd/lsmt/file.h b/src/overlaybd/lsmt/file.h index 1fc7b5bc..bb11bc99 100644 --- a/src/overlaybd/lsmt/file.h +++ b/src/overlaybd/lsmt/file.h @@ -98,6 +98,9 @@ class IFileRW : public IFileRO { struct DataStat { uint64_t total_data_size = -1; // size of total data uint64_t valid_data_size = -1; // size of valid data (excluding garbage) + // Block rewrite telemetry (in 512B block units) + uint64_t total_blocks_written = 0; // Total blocks written + uint64_t rewritten_blocks = 0; // Blocks that overwrote previous data }; virtual DataStat data_stat() const = 0; diff --git a/src/overlaybd/lsmt/index.cpp b/src/overlaybd/lsmt/index.cpp index 0b4cee43..ad93cdd8 100644 --- a/src/overlaybd/lsmt/index.cpp +++ b/src/overlaybd/lsmt/index.cpp @@ -256,6 +256,14 @@ class Index0 : public IComboIndex { } } alloc_blk; +#ifdef ENABLE_REWRITE_STATS + // Block rewrite tracking for telemetry + struct rewrite_stats { + uint64_t total_blocks_written = 0; // Total blocks written (in 512B units) + uint64_t rewritten_blocks = 0; // Blocks that overwrote previous data + } m_rewrite_stats; +#endif + // Index0(const set &mapping) : mapping(mapping){}; Index0(const SegmentMapping *pmappings = nullptr, size_t n = 0) { @@ -272,19 +280,25 @@ class Index0 : public IComboIndex { virtual const SegmentMapping *buffer() const override { return nullptr; } - iterator remove_partial_overlap(iterator it, uint64_t offset, uint32_t length) { + iterator remove_partial_overlap(iterator it, uint64_t offset, uint32_t length, uint64_t *rewritten_out) { auto nx = next(it); auto end = offset + length; auto p = (SegmentMapping *)&*it; + uint64_t rewritten = 0; + if (p->offset < offset) // p->offset < offset < p->end() < end { assert(p->end() > offset); alloc_blk -= *p; if (p->end() <= end) { + // Overlap from offset to p->end() + rewritten = p->end() - offset; p->backward_end_to(offset); alloc_blk += *p; } else // if (p->end() > end) // m lies in *p { + // Complete overlap - the entire new mapping overlaps with this segment + rewritten = length; SegmentMapping nm = *p; nm.forward_offset_to(end); p->backward_end_to(offset); // shrink first, @@ -296,13 +310,21 @@ class Index0 : public IComboIndex { alloc_blk -= *p; if (p->end() <= end) // included by [offset, end) { + // Entire segment is overwritten + rewritten = p->length; mapping.erase(it); } else // (p->end() > end) { + // Overlap from p->offset to end + rewritten = end - p->offset; p->forward_offset_to(end); alloc_blk += *p; } } + + if (rewritten_out) { + *rewritten_out += rewritten; + } return nx; } iterator prev(iterator it) const { @@ -314,6 +336,15 @@ class Index0 : public IComboIndex { virtual void insert(SegmentMapping m) override { if (m.length == 0) return; + +#ifdef ENABLE_REWRITE_STATS + // Track total blocks written for rewrite telemetry + m_rewrite_stats.total_blocks_written += m.length; + + // Track blocks that will be overwritten - computed by remove_partial_overlap + uint64_t rewritten = 0; +#endif + alloc_blk += m; auto it = mapping.lower_bound(m); if (it == mapping.end()) { @@ -321,18 +352,34 @@ class Index0 : public IComboIndex { return; } - it = remove_partial_overlap(it, m.offset, m.length); // first one (there must be) +#ifdef ENABLE_REWRITE_STATS + it = remove_partial_overlap(it, m.offset, m.length, &rewritten); // first one (there must be) +#else + it = remove_partial_overlap(it, m.offset, m.length, nullptr); // first one (there must be) +#endif assert(it == mapping.end() || it->offset > m.offset); while (it != mapping.end() && it->offset < m.end()) { if (it->end() <= m.end()) { +#ifdef ENABLE_REWRITE_STATS + // Entire segment is overwritten + rewritten += it->length; +#endif alloc_blk -= *it; it = mapping.erase(it); // middle ones, if there are } else { - it = remove_partial_overlap(it, m.offset, m.length); // last one, if there is +#ifdef ENABLE_REWRITE_STATS + it = remove_partial_overlap(it, m.offset, m.length, &rewritten); // last one, if there is +#else + it = remove_partial_overlap(it, m.offset, m.length, nullptr); // last one, if there is +#endif break; } } mapping.insert(it, m); + +#ifdef ENABLE_REWRITE_STATS + m_rewrite_stats.rewritten_blocks += rewritten; +#endif } virtual size_t lookup(Segment s, /* OUT */ SegmentMapping *pm, size_t n) const override { @@ -369,6 +416,17 @@ class Index0 : public IComboIndex { return alloc_blk.m_alloc; } + virtual RewriteStats rewrite_stats() const override { +#ifdef ENABLE_REWRITE_STATS + return RewriteStats{ + m_rewrite_stats.total_blocks_written, + m_rewrite_stats.rewritten_blocks + }; +#else + return RewriteStats{0, 0}; +#endif + } + // returns the first and last mapping in the index // the there's no one, return an invalid mapping: [INVALID_OFFSET, 0) ==> 0 virtual SegmentMapping front() const override { diff --git a/src/overlaybd/lsmt/index.h b/src/overlaybd/lsmt/index.h index fc47ec00..f7a61a0c 100644 --- a/src/overlaybd/lsmt/index.h +++ b/src/overlaybd/lsmt/index.h @@ -132,6 +132,12 @@ class IMemoryIndex { virtual IMemoryIndex *make_read_only_index() const = 0; }; +// Block rewrite statistics for telemetry +struct RewriteStats { + uint64_t total_blocks_written = 0; // Total blocks written (in 512B units) + uint64_t rewritten_blocks = 0; // Blocks that overwrote previous data +}; + // the level 0 memory index, which supports write class IMemoryIndex0 : public IMemoryIndex { public: @@ -142,6 +148,9 @@ class IMemoryIndex0 : public IMemoryIndex { // memory allocation is aligned to the `alignment` virtual SegmentMapping *dump(size_t alignment = 0) const = 0; // virtual IMemoryIndex *make_read_only_index() const = 0; + + // Get block rewrite statistics for telemetry + virtual RewriteStats rewrite_stats() const = 0; }; class IComboIndex : public IMemoryIndex0 { diff --git a/src/tools/overlaybd-commit.cpp b/src/tools/overlaybd-commit.cpp index 61a3a742..e52e3a7c 100644 --- a/src/tools/overlaybd-commit.cpp +++ b/src/tools/overlaybd-commit.cpp @@ -188,6 +188,17 @@ int main(int argc, char **argv) { out = fout; } + // Get data stats before commit for telemetry (includes rewrite frequency) + auto stats = fin->data_stat(); + LOG_INFO("data_stat: total_data_size=`, valid_data_size=`, total_blocks_written=`, rewritten_blocks=`", + stats.total_data_size, stats.valid_data_size, + stats.total_blocks_written, stats.rewritten_blocks); + + // Output rewrite stats as JSON to stdout for the caller to parse + // Format: {"total_blocks_written": N, "rewritten_blocks": M} + printf("{\"total_blocks_written\": %" PRIu64 ", \"rewritten_blocks\": %" PRIu64 "}\n", + stats.total_blocks_written, stats.rewritten_blocks); + CommitArgs args(out); if (!uuid.empty()) { memset(args.uuid.data, 0, UUID::String::LEN);