Skip to content

Commit

Permalink
Allow SstFileReader to verify number of entries in SST files (faceboo…
Browse files Browse the repository at this point in the history
…k#12418)

Summary:
Add `SstFileReader::VerifyNumEntries()` for this purpose. I added the same functionality to `sst_dump` in facebook#12322. Since sst_file_reader.h is exposed to users while sst_dump.h is not, it seems more appropriate to add SST files related APIs here.

Pull Request resolved: facebook#12418

Test Plan: `./sst_file_reader_test --gtest_filter="*VerifyNumEntries*"`

Reviewed By: jowlyzhang

Differential Revision: D54764271

Pulled By: cbi42

fbshipit-source-id: 22ebfe04bbb0b152762cee13d4210b147b36d3e9
  • Loading branch information
cbi42 authored and facebook-github-bot committed Mar 12, 2024
1 parent c4d37da commit 36c1b0a
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 7 deletions.
6 changes: 6 additions & 0 deletions include/rocksdb/sst_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ class SstFileReader {
std::shared_ptr<const TableProperties> GetTableProperties() const;

// Verifies whether there is corruption in this table.
// For the default BlockBasedTable, this will verify the block
// checksum of each block.
Status VerifyChecksum(const ReadOptions& /*read_options*/);

// TODO: plumb Env::IOActivity, Env::IOPriority
Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }

// Verify that the number of entries in the table matches table property.
// A Corruption status is returned if they do not match.
Status VerifyNumEntries(const ReadOptions& /*read_options*/);

private:
struct Rep;
std::unique_ptr<Rep> rep_;
Expand Down
13 changes: 8 additions & 5 deletions table/sst_file_dumper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,14 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num_limit,
// TODO: verify num_range_deletions
if (i != table_properties_->num_entries -
table_properties_->num_range_deletions) {
ret =
Status::Corruption("Table property has num_entries = " +
std::to_string(table_properties_->num_entries) +
" but scanning the table returns " +
std::to_string(i) + " records.");
std::ostringstream oss;
oss << "Table property expects "
<< table_properties_->num_entries -
table_properties_->num_range_deletions
<< " entries when excluding range deletions,"
<< " but scanning the table returned " << std::to_string(i)
<< " entries";
ret = Status::Corruption(oss.str());
}
}
}
Expand Down
36 changes: 36 additions & 0 deletions table/sst_file_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,40 @@ Status SstFileReader::VerifyChecksum(const ReadOptions& read_options) {
TableReaderCaller::kSSTFileReader);
}

Status SstFileReader::VerifyNumEntries(const ReadOptions& read_options) {
Rep* r = rep_.get();
std::unique_ptr<InternalIterator> internal_iter{r->table_reader->NewIterator(
read_options, r->moptions.prefix_extractor.get(), nullptr,
false /* skip_filters */, TableReaderCaller::kSSTFileReader)};
internal_iter->SeekToFirst();
Status s = internal_iter->status();
if (!s.ok()) {
return s;
}
uint64_t num_read = 0;
for (; internal_iter->Valid(); internal_iter->Next()) {
++num_read;
};
s = internal_iter->status();
if (!s.ok()) {
return s;
}
std::shared_ptr<const TableProperties> tp = GetTableProperties();
if (!tp) {
s = Status::Corruption("table properties not available");
} else {
// TODO: verify num_range_deletions
uint64_t expected = tp->num_entries - tp->num_range_deletions;
if (num_read != expected) {
std::ostringstream oss;
oss << "Table property expects " << expected
<< " entries when excluding range deletions,"
<< " but scanning the table returned " << std::to_string(num_read)
<< " entries";
s = Status::Corruption(oss.str());
}
}
return s;
}

} // namespace ROCKSDB_NAMESPACE
51 changes: 51 additions & 0 deletions table/sst_file_reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,57 @@ TEST_F(SstFileReaderTimestampNotPersistedTest, IncompatibleTimestampFormat) {
.IsInvalidArgument());
}

TEST_F(SstFileReaderTest, VerifyNumEntriesBasic) {
std::vector<std::string> keys;
for (uint64_t i = 0; i < kNumKeys; i++) {
keys.emplace_back(EncodeAsUint64(i));
}
CreateFile(sst_name_, keys);
SstFileReader reader(options_);
ASSERT_OK(reader.Open(sst_name_));
ASSERT_OK(reader.VerifyNumEntries(ReadOptions()));
}

TEST_F(SstFileReaderTest, VerifyNumEntriesDeleteRange) {
SstFileWriter writer(soptions_, options_);
ASSERT_OK(writer.Open(sst_name_));

for (uint64_t i = 0; i < kNumKeys; i++) {
ASSERT_OK(writer.Put(EncodeAsUint64(i), EncodeAsUint64(i + 1)));
}
ASSERT_OK(
writer.DeleteRange(EncodeAsUint64(0), EncodeAsUint64(kNumKeys / 2)));
ASSERT_OK(writer.Finish());
SstFileReader reader(options_);
ASSERT_OK(reader.Open(sst_name_));
ASSERT_OK(reader.VerifyNumEntries(ReadOptions()));
}

TEST_F(SstFileReaderTest, VerifyNumEntriesCorruption) {
const int num_keys = 99;
const int corrupted_num_keys = num_keys + 2;
SyncPoint::GetInstance()->SetCallBack(
"PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
TableProperties* props = reinterpret_cast<TableProperties*>(arg);
props->num_entries = corrupted_num_keys;
});
SyncPoint::GetInstance()->EnableProcessing();
std::vector<std::string> keys;
for (uint64_t i = 0; i < num_keys; i++) {
keys.emplace_back(EncodeAsUint64(i));
}
CreateFile(sst_name_, keys);
SstFileReader reader(options_);
ASSERT_OK(reader.Open(sst_name_));
Status s = reader.VerifyNumEntries(ReadOptions());
ASSERT_TRUE(s.IsCorruption());
std::ostringstream oss;
oss << "Table property expects " << corrupted_num_keys
<< " entries when excluding range deletions,"
<< " but scanning the table returned " << num_keys << " entries";
ASSERT_TRUE(std::strstr(oss.str().c_str(), s.getState()));
}

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
4 changes: 2 additions & 2 deletions tools/sst_dump_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,8 @@ TEST_F(SSTDumpToolTest, SstFileDumperVerifyNumRecords) {
/*has_to=*/false, /*to_key=*/"");
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(
std::strstr("Table property has num_entries = 1026 but scanning the "
"table returns 1024 records.",
std::strstr("Table property expects 1026 entries when excluding range "
"deletions, but scanning the table returned 1024 entries",
s.getState()));

// Validation is not performed when read_num, has_from, has_to are set
Expand Down

0 comments on commit 36c1b0a

Please sign in to comment.