diff --git a/include/rocksdb/sst_file_reader.h b/include/rocksdb/sst_file_reader.h index dca5a8f03ac..4e5cda130a2 100644 --- a/include/rocksdb/sst_file_reader.h +++ b/include/rocksdb/sst_file_reader.h @@ -32,11 +32,17 @@ class SstFileReader { std::shared_ptr GetTableProperties() const; // Verifies whether there is corruption in this table. + // For the default BlockBasedTable, this will verify the block + // checksum of each block. Status VerifyChecksum(const ReadOptions& /*read_options*/); // TODO: plumb Env::IOActivity, Env::IOPriority Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } + // Verify that the number of entries in the table matches table property. + // A Corruption status is returned if they do not match. + Status VerifyNumEntries(const ReadOptions& /*read_options*/); + private: struct Rep; std::unique_ptr rep_; diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 0ad0d04bfa1..ebc44a20fd1 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -565,11 +565,14 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num_limit, // TODO: verify num_range_deletions if (i != table_properties_->num_entries - table_properties_->num_range_deletions) { - ret = - Status::Corruption("Table property has num_entries = " + - std::to_string(table_properties_->num_entries) + - " but scanning the table returns " + - std::to_string(i) + " records."); + std::ostringstream oss; + oss << "Table property expects " + << table_properties_->num_entries - + table_properties_->num_range_deletions + << " entries when excluding range deletions," + << " but scanning the table returned " << std::to_string(i) + << " entries"; + ret = Status::Corruption(oss.str()); } } } diff --git a/table/sst_file_reader.cc b/table/sst_file_reader.cc index 39da7038b2c..da50ff037c8 100644 --- a/table/sst_file_reader.cc +++ b/table/sst_file_reader.cc @@ -105,4 +105,40 @@ Status SstFileReader::VerifyChecksum(const ReadOptions& read_options) { TableReaderCaller::kSSTFileReader); } +Status SstFileReader::VerifyNumEntries(const ReadOptions& read_options) { + Rep* r = rep_.get(); + std::unique_ptr internal_iter{r->table_reader->NewIterator( + read_options, r->moptions.prefix_extractor.get(), nullptr, + false /* skip_filters */, TableReaderCaller::kSSTFileReader)}; + internal_iter->SeekToFirst(); + Status s = internal_iter->status(); + if (!s.ok()) { + return s; + } + uint64_t num_read = 0; + for (; internal_iter->Valid(); internal_iter->Next()) { + ++num_read; + }; + s = internal_iter->status(); + if (!s.ok()) { + return s; + } + std::shared_ptr tp = GetTableProperties(); + if (!tp) { + s = Status::Corruption("table properties not available"); + } else { + // TODO: verify num_range_deletions + uint64_t expected = tp->num_entries - tp->num_range_deletions; + if (num_read != expected) { + std::ostringstream oss; + oss << "Table property expects " << expected + << " entries when excluding range deletions," + << " but scanning the table returned " << std::to_string(num_read) + << " entries"; + s = Status::Corruption(oss.str()); + } + } + return s; +} + } // namespace ROCKSDB_NAMESPACE diff --git a/table/sst_file_reader_test.cc b/table/sst_file_reader_test.cc index b5e00caded5..597909925a9 100644 --- a/table/sst_file_reader_test.cc +++ b/table/sst_file_reader_test.cc @@ -527,6 +527,57 @@ TEST_F(SstFileReaderTimestampNotPersistedTest, IncompatibleTimestampFormat) { .IsInvalidArgument()); } +TEST_F(SstFileReaderTest, VerifyNumEntriesBasic) { + std::vector keys; + for (uint64_t i = 0; i < kNumKeys; i++) { + keys.emplace_back(EncodeAsUint64(i)); + } + CreateFile(sst_name_, keys); + SstFileReader reader(options_); + ASSERT_OK(reader.Open(sst_name_)); + ASSERT_OK(reader.VerifyNumEntries(ReadOptions())); +} + +TEST_F(SstFileReaderTest, VerifyNumEntriesDeleteRange) { + SstFileWriter writer(soptions_, options_); + ASSERT_OK(writer.Open(sst_name_)); + + for (uint64_t i = 0; i < kNumKeys; i++) { + ASSERT_OK(writer.Put(EncodeAsUint64(i), EncodeAsUint64(i + 1))); + } + ASSERT_OK( + writer.DeleteRange(EncodeAsUint64(0), EncodeAsUint64(kNumKeys / 2))); + ASSERT_OK(writer.Finish()); + SstFileReader reader(options_); + ASSERT_OK(reader.Open(sst_name_)); + ASSERT_OK(reader.VerifyNumEntries(ReadOptions())); +} + +TEST_F(SstFileReaderTest, VerifyNumEntriesCorruption) { + const int num_keys = 99; + const int corrupted_num_keys = num_keys + 2; + SyncPoint::GetInstance()->SetCallBack( + "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { + TableProperties* props = reinterpret_cast(arg); + props->num_entries = corrupted_num_keys; + }); + SyncPoint::GetInstance()->EnableProcessing(); + std::vector keys; + for (uint64_t i = 0; i < num_keys; i++) { + keys.emplace_back(EncodeAsUint64(i)); + } + CreateFile(sst_name_, keys); + SstFileReader reader(options_); + ASSERT_OK(reader.Open(sst_name_)); + Status s = reader.VerifyNumEntries(ReadOptions()); + ASSERT_TRUE(s.IsCorruption()); + std::ostringstream oss; + oss << "Table property expects " << corrupted_num_keys + << " entries when excluding range deletions," + << " but scanning the table returned " << num_keys << " entries"; + ASSERT_TRUE(std::strstr(oss.str().c_str(), s.getState())); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index 53721e33022..a5c567b38af 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -583,8 +583,8 @@ TEST_F(SSTDumpToolTest, SstFileDumperVerifyNumRecords) { /*has_to=*/false, /*to_key=*/""); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE( - std::strstr("Table property has num_entries = 1026 but scanning the " - "table returns 1024 records.", + std::strstr("Table property expects 1026 entries when excluding range " + "deletions, but scanning the table returned 1024 entries", s.getState())); // Validation is not performed when read_num, has_from, has_to are set