diff --git a/infra/indexer/index/sqlite.cc b/infra/indexer/index/sqlite.cc index 3169f6fef0eb..5cae35a8ca6a 100644 --- a/infra/indexer/index/sqlite.cc +++ b/infra/indexer/index/sqlite.cc @@ -16,8 +16,10 @@ #include #include // NOLINT +#include #include #include +#include #include "indexer/index/types.h" #include "absl/cleanup/cleanup.h" @@ -30,6 +32,13 @@ namespace oss_fuzz { namespace indexer { namespace { + +// Note: We could in principle enforce UNIQUE constraints on `reference` foreign +// key pairs, as well as those of `virtual_method_link` and +// `entity_translation_unit` (as an extreme, non-ID fields of e.g. `location` +// could also be made into a UNIQUE tuple). But those are unique by construction +// now and we hope to avoid the overhead of checking those constraints. + const char kCreateDb[] = "PRAGMA foreign_keys = ON;\n" "PRAGMA user_version = " SCHEMA_VERSION @@ -87,11 +96,33 @@ const char kCreateDb[] = "CREATE INDEX virtual_method_link_parent ON virtual_method_link(" " parent_entity_id);\n"; +const char kCreateIncrementalIndexingSupportTables[] = + "CREATE TABLE translation_unit(\n" + " id INTEGER PRIMARY KEY,\n" + " path TEXT);\n" + "\n" + "CREATE TABLE entity_translation_unit(\n" + " id INTEGER PRIMARY KEY,\n" + " entity_id INT NOT NULL,\n" + " tu_id INT NOT NULL,\n" + " FOREIGN KEY (entity_id) REFERENCES entity(id),\n" + " FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n" + "\n" + "CREATE TABLE reference_translation_unit(\n" + " id INTEGER PRIMARY KEY,\n" + " reference_id INT NOT NULL,\n" + " tu_id INT NOT NULL,\n" + " FOREIGN KEY (reference_id) REFERENCES reference(id),\n" + " FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n"; + const char kInsertLocation[] = "INSERT INTO location\n" " (id, dirname, basename, start_line, end_line)\n" " VALUES (?1, ?2, ?3, ?4, ?5);"; +const char kSelectLocations[] = + "SELECT dirname, basename, start_line, end_line FROM location ORDER BY id;"; + const char kInsertEntity[] = "INSERT INTO entity\n" " (id, kind, is_incomplete, name_prefix, name, name_suffix, location_id,\n" @@ -99,16 +130,55 @@ const char kInsertEntity[] = " virtual_method_kind)\n" " VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11);"; +const char kSelectEntities[] = + "SELECT kind, is_incomplete, name_prefix, name, name_suffix, " + " location_id, substitute_entity_id, substitute_relationship_kind, " + " enum_value, virtual_method_kind\n" + " FROM entity\n" + " ORDER BY id;"; + const char kInsertReference[] = "INSERT INTO reference\n" " (id, entity_id, location_id)\n" " VALUES (?1, ?2, ?3);"; +const char kSelectReferences[] = + "SELECT entity_id, location_id FROM reference ORDER BY id;"; + const char kInsertLink[] = "INSERT INTO virtual_method_link\n" " (id, parent_entity_id, child_entity_id)\n" " VALUES (?1, ?2, ?3);"; +const char kSelectLinks[] = + "SELECT parent_entity_id, child_entity_id\n" + " FROM virtual_method_link\n" + " ORDER BY id;"; + +const char kInsertTranslationUnit[] = + "INSERT INTO translation_unit\n" + " (id, path)\n" + " VALUES (?1, ?2);"; + +const char kSelectTranslationUnits[] = + "SELECT path FROM translation_unit ORDER BY id;"; + +const char kInsertEntityTranslationUnit[] = + "INSERT INTO entity_translation_unit\n" + " (id, entity_id, tu_id)\n" + " VALUES (?1, ?2, ?3);"; + +const char kSelectEntityTranslationUnits[] = + "SELECT entity_id, tu_id FROM entity_translation_unit ORDER BY id;"; + +const char kInsertReferenceTranslationUnit[] = + "INSERT INTO reference_translation_unit\n" + " (id, reference_id, tu_id)\n" + " VALUES (?1, ?2, ?3);"; + +const char kSelectReferenceTranslationUnits[] = + "SELECT reference_id, tu_id FROM reference_translation_unit ORDER BY id;"; + const char kFinalizeDb[] = "VACUUM;\n" "REINDEX;\n" @@ -163,8 +233,8 @@ bool InsertLocations(sqlite3* db, absl::Span locations) { bool InsertEntities(sqlite3* db, absl::Span entities) { // `substitute_entity_id` foreign key can refer to a yet-unadded entity. - if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr, - nullptr, nullptr) != SQLITE_OK) { + if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr, nullptr, + nullptr) != SQLITE_OK) { LOG(ERROR) << "sqlite disabling foreign keys failed: `" << sqlite3_errmsg(db) << "`"; return false; @@ -261,8 +331,8 @@ bool InsertEntities(sqlite3* db, absl::Span entities) { std::move(cleanup).Cancel(); sqlite3_finalize(insert_entity); - if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr, nullptr) - != SQLITE_OK) { + if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr, + nullptr) != SQLITE_OK) { LOG(ERROR) << "sqlite re-enabling foreign keys failed: `" << sqlite3_errmsg(db) << "`"; return false; @@ -320,7 +390,7 @@ bool InsertVirtualMethodLinks(sqlite3* db, return false; } - for (ReferenceId i = 0; i < links.size(); ++i) { + for (VirtualMethodLinkId i = 0; i < links.size(); ++i) { const VirtualMethodLink& link = links[i]; if (sqlite3_bind_int64(insert_link, 1, i) != SQLITE_OK || sqlite3_bind_int64(insert_link, 2, link.parent()) != SQLITE_OK || @@ -332,7 +402,7 @@ bool InsertVirtualMethodLinks(sqlite3* db, } if (sqlite3_step(insert_link) != SQLITE_DONE) { - LOG(ERROR) << "sqlite executing insert_reference failed: `" + LOG(ERROR) << "sqlite executing insert_link failed: `" << sqlite3_errmsg(db) << "`"; sqlite3_finalize(insert_link); return false; @@ -345,10 +415,411 @@ bool InsertVirtualMethodLinks(sqlite3* db, sqlite3_finalize(insert_link); return true; } + +bool InsertTranslationUnits( + sqlite3* db, absl::Span translation_units) { + sqlite3_stmt* insert_tu = nullptr; + if (sqlite3_prepare_v2(db, kInsertTranslationUnit, + sizeof(kInsertTranslationUnit), &insert_tu, + nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + for (TranslationUnitId i = 0; i < translation_units.size(); ++i) { + const TranslationUnit& tu = translation_units[i]; + if (sqlite3_bind_int64(insert_tu, 1, i) != SQLITE_OK || + sqlite3_bind_text(insert_tu, 2, tu.index_path().data(), + tu.index_path().size(), SQLITE_STATIC) != SQLITE_OK) { + LOG(ERROR) << "sqlite binding insert_tu failed: `" << sqlite3_errmsg(db) + << "`"; + sqlite3_finalize(insert_tu); + return false; + } + + if (sqlite3_step(insert_tu) != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing insert_tu failed: `" << sqlite3_errmsg(db) + << "`"; + sqlite3_finalize(insert_tu); + return false; + } + + sqlite3_reset(insert_tu); + sqlite3_clear_bindings(insert_tu); + } + + sqlite3_finalize(insert_tu); + return true; +} + +bool InsertEntityTranslationUnits( + sqlite3* db, + absl::Span entity_translation_units) { + sqlite3_stmt* insert_entity_tu = nullptr; + if (sqlite3_prepare_v2(db, kInsertEntityTranslationUnit, + sizeof(kInsertEntityTranslationUnit), + &insert_entity_tu, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + for (EntityTranslationUnitId i = 0; i < entity_translation_units.size(); + ++i) { + const EntityTranslationUnit& entity_tu = entity_translation_units[i]; + if (sqlite3_bind_int64(insert_entity_tu, 1, i) != SQLITE_OK || + sqlite3_bind_int64(insert_entity_tu, 2, entity_tu.entity_id()) != + SQLITE_OK || + sqlite3_bind_int64(insert_entity_tu, 3, entity_tu.tu_id()) != + SQLITE_OK) { + LOG(ERROR) << "sqlite binding insert_entity_tu failed: `" + << sqlite3_errmsg(db) << "`"; + sqlite3_finalize(insert_entity_tu); + return false; + } + + if (sqlite3_step(insert_entity_tu) != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing insert_entity_tu failed: `" + << sqlite3_errmsg(db) << "`"; + sqlite3_finalize(insert_entity_tu); + return false; + } + + sqlite3_reset(insert_entity_tu); + sqlite3_clear_bindings(insert_entity_tu); + } + + sqlite3_finalize(insert_entity_tu); + return true; +} + +bool InsertReferenceTranslationUnits( + sqlite3* db, + absl::Span reference_translation_units) { + sqlite3_stmt* insert_reference_tu = nullptr; + if (sqlite3_prepare_v2(db, kInsertReferenceTranslationUnit, + sizeof(kInsertReferenceTranslationUnit), + &insert_reference_tu, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + for (ReferenceTranslationUnitId i = 0; i < reference_translation_units.size(); + ++i) { + const ReferenceTranslationUnit& reference_tu = + reference_translation_units[i]; + if (sqlite3_bind_int64(insert_reference_tu, 1, i) != SQLITE_OK || + sqlite3_bind_int64(insert_reference_tu, 2, + reference_tu.reference_id()) != SQLITE_OK || + sqlite3_bind_int64(insert_reference_tu, 3, reference_tu.tu_id()) != + SQLITE_OK) { + LOG(ERROR) << "sqlite binding insert_reference_tu failed: `" + << sqlite3_errmsg(db) << "`"; + sqlite3_finalize(insert_reference_tu); + return false; + } + + if (sqlite3_step(insert_reference_tu) != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing insert_reference_tu failed: `" + << sqlite3_errmsg(db) << "`"; + sqlite3_finalize(insert_reference_tu); + return false; + } + + sqlite3_reset(insert_reference_tu); + sqlite3_clear_bindings(insert_reference_tu); + } + + sqlite3_finalize(insert_reference_tu); + return true; +} + +// Returns text column `column` of `stmt` as an optional string. Returns +// `nullopt` if the column value is `NULL`. +std::optional OptionalColumnText(sqlite3_stmt* stmt, int column) { + const char* text = + reinterpret_cast(sqlite3_column_text(stmt, column)); + if (text) { + return std::string(text); + } + return std::nullopt; +} + +// Returns text column `column` of `stmt` as a string. Returns an empty string +// if the column value is `NULL`. +std::string ColumnText(sqlite3_stmt* stmt, int column) { + return OptionalColumnText(stmt, column).value_or(std::string()); +} + +bool ReadLocations(sqlite3* db, std::vector& locations) { + locations.clear(); + + sqlite3_stmt* select_locations = nullptr; + if (sqlite3_prepare_v2(db, kSelectLocations, sizeof(kSelectLocations), + &select_locations, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_locations] { + sqlite3_finalize(select_locations); + }; + + int code; + while ((code = sqlite3_step(select_locations)) == SQLITE_ROW) { + std::string dirname = ColumnText(select_locations, 0); + std::string basename = ColumnText(select_locations, 1); + std::filesystem::path path = std::filesystem::path(dirname) / basename; + locations.emplace_back(path.string(), + sqlite3_column_int(select_locations, 2), + sqlite3_column_int(select_locations, 3)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_locations failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool ReadEntities(sqlite3* db, std::vector& entities) { + entities.clear(); + + sqlite3_stmt* select_entities = nullptr; + if (sqlite3_prepare_v2(db, kSelectEntities, sizeof(kSelectEntities), + &select_entities, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_entities] { + sqlite3_finalize(select_entities); + }; + + int code; + while ((code = sqlite3_step(select_entities)) == SQLITE_ROW) { + Entity::Kind kind = + static_cast(sqlite3_column_int(select_entities, 0)); + bool is_incomplete = sqlite3_column_int(select_entities, 1); + std::string name_prefix = ColumnText(select_entities, 2); + std::string name = ColumnText(select_entities, 3); + std::string name_suffix = ColumnText(select_entities, 4); + LocationId location_id = sqlite3_column_int64(select_entities, 5); + + std::optional substitute_relationship; + if (sqlite3_column_type(select_entities, 6) != SQLITE_NULL) { + EntityId substitute_entity_id = sqlite3_column_int64(select_entities, 6); + SubstituteRelationship::Kind substitute_relationship_kind = + static_cast( + sqlite3_column_int(select_entities, 7)); + substitute_relationship.emplace(substitute_relationship_kind, + substitute_entity_id); + } + + std::optional enum_value = + OptionalColumnText(select_entities, 8); + Entity::VirtualMethodKind virtual_method_kind = + static_cast( + sqlite3_column_int(select_entities, 9)); + + entities.emplace_back(kind, name_prefix, name, name_suffix, location_id, + is_incomplete, /*is_weak=*/false, + substitute_relationship, enum_value, + virtual_method_kind); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_entities failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool ReadReferences(sqlite3* db, std::vector& references) { + references.clear(); + + sqlite3_stmt* select_references = nullptr; + if (sqlite3_prepare_v2(db, kSelectReferences, sizeof(kSelectReferences), + &select_references, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_references] { + sqlite3_finalize(select_references); + }; + + int code; + while ((code = sqlite3_step(select_references)) == SQLITE_ROW) { + references.emplace_back(sqlite3_column_int64(select_references, 0), + sqlite3_column_int64(select_references, 1)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_references failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool ReadVirtualMethodLinks(sqlite3* db, + std::vector& links) { + links.clear(); + + sqlite3_stmt* select_links = nullptr; + if (sqlite3_prepare_v2(db, kSelectLinks, sizeof(kSelectLinks), &select_links, + nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_links] { sqlite3_finalize(select_links); }; + + int code; + while ((code = sqlite3_step(select_links)) == SQLITE_ROW) { + links.emplace_back(sqlite3_column_int64(select_links, 0), + sqlite3_column_int64(select_links, 1)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_links failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool ReadTranslationUnits(sqlite3* db, + std::vector& translation_units) { + translation_units.clear(); + + sqlite3_stmt* select_tus = nullptr; + if (sqlite3_prepare_v2(db, kSelectTranslationUnits, + sizeof(kSelectTranslationUnits), &select_tus, + nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_tus] { sqlite3_finalize(select_tus); }; + + int code; + while ((code = sqlite3_step(select_tus)) == SQLITE_ROW) { + translation_units.emplace_back(ColumnText(select_tus, 0)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_tus failed: `" << sqlite3_errmsg(db) + << "`"; + return false; + } + + return true; +} + +bool ReadEntityTranslationUnits( + sqlite3* db, std::vector& entity_translation_units) { + entity_translation_units.clear(); + + sqlite3_stmt* select_entity_tus = nullptr; + if (sqlite3_prepare_v2(db, kSelectEntityTranslationUnits, + sizeof(kSelectEntityTranslationUnits), + &select_entity_tus, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_entity_tus] { + sqlite3_finalize(select_entity_tus); + }; + + int code; + while ((code = sqlite3_step(select_entity_tus)) == SQLITE_ROW) { + entity_translation_units.emplace_back( + sqlite3_column_int64(select_entity_tus, 0), + sqlite3_column_int64(select_entity_tus, 1)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_entity_tus failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool ReadReferenceTranslationUnits( + sqlite3* db, + std::vector& reference_translation_units) { + reference_translation_units.clear(); + + sqlite3_stmt* select_reference_tus = nullptr; + if (sqlite3_prepare_v2(db, kSelectReferenceTranslationUnits, + sizeof(kSelectReferenceTranslationUnits), + &select_reference_tus, nullptr) != SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [select_reference_tus] { + sqlite3_finalize(select_reference_tus); + }; + + int code; + while ((code = sqlite3_step(select_reference_tus)) == SQLITE_ROW) { + reference_translation_units.emplace_back( + sqlite3_column_int64(select_reference_tus, 0), + sqlite3_column_int64(select_reference_tus, 1)); + } + + if (code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select_reference_tus failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + + return true; +} + +bool TableExists(sqlite3* db, const char* table_name) { + sqlite3_stmt* stmt = nullptr; + const char query[] = + "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1;"; + if (sqlite3_prepare_v2(db, query, sizeof(query), &stmt, nullptr) != + SQLITE_OK) { + LOG(ERROR) << "sqlite compiling prepared statement failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + absl::Cleanup cleanup = [stmt] { sqlite3_finalize(stmt); }; + if (sqlite3_bind_text(stmt, 1, table_name, -1, SQLITE_STATIC) != SQLITE_OK) { + LOG(ERROR) << "sqlite binding table_name failed: `" << sqlite3_errmsg(db) + << "`"; + return false; + } + + int code = sqlite3_step(stmt); + if (code != SQLITE_ROW && code != SQLITE_DONE) { + LOG(ERROR) << "sqlite executing select from `sqlite_master` failed: `" + << sqlite3_errmsg(db) << "`"; + return false; + } + return code == SQLITE_ROW; +} + } // anonymous namespace -bool SaveAsSqlite(const FlatIndex& index, const std::string& path) { - LOG(INFO) << "creating in-memory database"; +bool InitializeSqlite() { const size_t kSqliteMmapSize = 0x1000000000ull; if (sqlite3_config(SQLITE_CONFIG_SINGLETHREAD) != SQLITE_OK || sqlite3_config(SQLITE_CONFIG_MMAP_SIZE, kSqliteMmapSize, @@ -357,7 +828,71 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) { LOG(ERROR) << "sqlite setup failed"; return false; } + return true; +} +std::optional LoadFromSqlite(const std::string& path) { + sqlite3* db = nullptr; + if (sqlite3_open_v2(path.c_str(), &db, SQLITE_OPEN_READONLY, nullptr) != + SQLITE_OK) { + LOG(ERROR) << "sqlite open database failed: `" << sqlite3_errmsg(db) << "`"; + sqlite3_close(db); + return std::nullopt; + } + absl::Cleanup db_cleanup = [db] { sqlite3_close(db); }; + + FlatIndex index; + + LOG(INFO) << "reading locations"; + if (!ReadLocations(db, /*out*/ index.locations)) { + return std::nullopt; + } + + LOG(INFO) << "reading entities"; + if (!ReadEntities(db, /*out*/ index.entities)) { + return std::nullopt; + } + + LOG(INFO) << "reading references"; + if (!ReadReferences(db, /*out*/ index.references)) { + return std::nullopt; + } + + LOG(INFO) << "reading virtual method links"; + if (!ReadVirtualMethodLinks(db, /*out*/ index.virtual_method_links)) { + return std::nullopt; + } + + if (TableExists(db, "translation_unit")) { + LOG(INFO) << "reading translation units"; + index.incremental_indexing_metadata.emplace(); + if (!ReadTranslationUnits( + db, + /*out*/ index.incremental_indexing_metadata->translation_units)) { + return std::nullopt; + } + + LOG(INFO) << "reading entity - translation unit pairs"; + if (!ReadEntityTranslationUnits(db, + /*out*/ index.incremental_indexing_metadata + ->entity_translation_units)) { + return std::nullopt; + } + + LOG(INFO) << "reading reference - translation unit pairs"; + if (!ReadReferenceTranslationUnits( + db, + /*out*/ index.incremental_indexing_metadata + ->reference_translation_units)) { + return std::nullopt; + } + } + + return index; +} + +bool SaveAsSqlite(const FlatIndex& index, const std::string& path) { + LOG(INFO) << "creating in-memory database"; sqlite3* db = nullptr; char* error = nullptr; if (sqlite3_open(":memory:", &db) != SQLITE_OK || @@ -391,6 +926,39 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) { return false; } + if (index.incremental_indexing_metadata.has_value()) { + const IncrementalIndexingMetadata& metadata = + *index.incremental_indexing_metadata; + + LOG(INFO) << "creating incremental indexing support tables"; + if (sqlite3_exec(db, kCreateIncrementalIndexingSupportTables, nullptr, + nullptr, &error) != SQLITE_OK) { + LOG(ERROR) << "incremental indexing support table creation failed: `" + << error << "`"; + sqlite3_close(db); + return false; + } + + LOG(INFO) << "inserting translation units"; + if (!InsertTranslationUnits(db, metadata.translation_units)) { + sqlite3_close(db); + return false; + } + + LOG(INFO) << "inserting entity - translation unit pairs"; + if (!InsertEntityTranslationUnits(db, metadata.entity_translation_units)) { + sqlite3_close(db); + return false; + } + + LOG(INFO) << "inserting reference - translation unit pairs"; + if (!InsertReferenceTranslationUnits( + db, metadata.reference_translation_units)) { + sqlite3_close(db); + return false; + } + } + LOG(INFO) << "finalizing database"; if (sqlite3_exec(db, kFinalizeDb, nullptr, nullptr, &error) != SQLITE_OK) { LOG(ERROR) << "database finalization failed: `" << error << "`"; @@ -427,5 +995,6 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) { sqlite3_close(db); return backup_success; } + } // namespace indexer } // namespace oss_fuzz diff --git a/infra/indexer/index/sqlite.h b/infra/indexer/index/sqlite.h index 563075e111cd..ea7ad5d7c409 100644 --- a/infra/indexer/index/sqlite.h +++ b/infra/indexer/index/sqlite.h @@ -15,13 +15,18 @@ #ifndef OSS_FUZZ_INFRA_INDEXER_INDEX_SQLITE_H_ #define OSS_FUZZ_INFRA_INDEXER_INDEX_SQLITE_H_ +#include #include #include "indexer/index/types.h" namespace oss_fuzz { namespace indexer { + +bool InitializeSqlite(); +std::optional LoadFromSqlite(const std::string& path); bool SaveAsSqlite(const FlatIndex& index, const std::string& path); + } // namespace indexer } // namespace oss_fuzz diff --git a/infra/indexer/index/sqlite_unittest.cc b/infra/indexer/index/sqlite_unittest.cc new file mode 100644 index 000000000000..06d5d3eef351 --- /dev/null +++ b/infra/indexer/index/sqlite_unittest.cc @@ -0,0 +1,100 @@ +// Copyright 2025 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "indexer/index/sqlite.h" + +#include // NOLINT +#include +#include + +#include "indexer/index/types.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace oss_fuzz { +namespace indexer { +namespace { + +using ::testing::ElementsAreArray; + +class SqliteTest : public ::testing::Test { + protected: + static void SetUpTestSuite() { ASSERT_TRUE(InitializeSqlite()); } +}; + +TEST_F(SqliteTest, SaveAndLoad) { + FlatIndex index; + index.locations = { + Location("a/b.cc", 1, 2), + Location("c/d.h", 3, 4), + }; + index.entities = { + Entity(Entity::Kind::kEnumConstant, "", "kEnumValue", "", 1, false, false, + std::nullopt, "123"), + Entity(Entity::Kind::kClass, "foo::", "Bar", "", 0), + Entity(Entity::Kind::kFunction, "foo::", "Bar", "()", 1, false, false, + std::nullopt, std::nullopt, + Entity::VirtualMethodKind::kPureVirtual), + }; + index.references = { + Reference(/*entity_id=*/0, /*location_id=*/1), + Reference(/*entity_id=*/1, /*location_id=*/0), + }; + index.virtual_method_links = { + VirtualMethodLink(2, 2), + }; + index.incremental_indexing_metadata.emplace(); + index.incremental_indexing_metadata->translation_units = { + TranslationUnit("tu1"), + TranslationUnit("tu2"), + }; + index.incremental_indexing_metadata->entity_translation_units = { + EntityTranslationUnit(/*entity_id=*/0, /*tu_id=*/0), + EntityTranslationUnit(/*entity_id=*/1, /*tu_id=*/1), + EntityTranslationUnit(/*entity_id=*/2, /*tu_id=*/1), + }; + index.incremental_indexing_metadata->reference_translation_units = { + ReferenceTranslationUnit(/*reference_id=*/0, /*tu_id=*/1), + ReferenceTranslationUnit(/*reference_id=*/1, /*tu_id=*/0), + }; + + const std::string path = + (std::filesystem::path(::testing::TempDir()) / "test.sqlite").string(); + + ASSERT_TRUE(SaveAsSqlite(index, path)); + std::optional loaded_index = LoadFromSqlite(path); + ASSERT_TRUE(loaded_index.has_value()); + + EXPECT_THAT(loaded_index->locations, ElementsAreArray(index.locations)); + EXPECT_THAT(loaded_index->entities, ElementsAreArray(index.entities)); + EXPECT_THAT(loaded_index->references, ElementsAreArray(index.references)); + EXPECT_THAT(loaded_index->virtual_method_links, + ElementsAreArray(index.virtual_method_links)); + ASSERT_TRUE(loaded_index->incremental_indexing_metadata.has_value()); + EXPECT_THAT( + loaded_index->incremental_indexing_metadata->translation_units, + ElementsAreArray(index.incremental_indexing_metadata->translation_units)); + EXPECT_THAT( + loaded_index->incremental_indexing_metadata->entity_translation_units, + ElementsAreArray( + index.incremental_indexing_metadata->entity_translation_units)); + EXPECT_THAT( + loaded_index->incremental_indexing_metadata->reference_translation_units, + ElementsAreArray( + index.incremental_indexing_metadata->reference_translation_units)); +} + +} // namespace +} // namespace indexer +} // namespace oss_fuzz diff --git a/infra/indexer/index/types.h b/infra/indexer/index/types.h index 0af0067ca4ed..5481548f4731 100644 --- a/infra/indexer/index/types.h +++ b/infra/indexer/index/types.h @@ -40,8 +40,13 @@ using LocationId = uint64_t; using EntityId = uint64_t; using ReferenceId = uint64_t; using VirtualMethodLinkId = uint64_t; +using TranslationUnitId = uint64_t; +using EntityTranslationUnitId = uint64_t; +using ReferenceTranslationUnitId = uint64_t; constexpr LocationId kInvalidLocationId = 0xffffffffffffffffull; constexpr EntityId kInvalidEntityId = 0xffffffffffffffffull; +constexpr ReferenceId kInvalidReferenceId = 0xffffffffffffffffull; +constexpr TranslationUnitId kInvalidTranslationUnitId = 0xffffffffffffffffull; inline bool IsRealPath(absl::string_view path) { // Examples of built-in paths: `` and ``. @@ -180,6 +185,7 @@ class Entity { CHECK_EQ(substitute_relationship_.has_value(), new_substitute_entity_id.has_value()); if (substitute_relationship_.has_value()) { + CHECK_NE(*new_substitute_entity_id, kInvalidEntityId); substitute_relationship_->entity_id_ = *new_substitute_entity_id; } } @@ -343,6 +349,81 @@ H AbslHashValue(H h, const VirtualMethodLink& link) { return H::combine(std::move(h), link.parent(), link.child()); } +// Represents a single translation unit. +class TranslationUnit { + public: + explicit TranslationUnit(const std::string& index_path) + : index_path_(index_path) {} + + const std::string& index_path() const { return index_path_; } + + bool operator==(const TranslationUnit&) const = default; + std::strong_ordering operator<=>(const TranslationUnit&) const = default; + + private: + std::string index_path_; +}; + +template +H AbslHashValue(H h, const TranslationUnit& tu) { + return H::combine(std::move(h), tu.index_path()); +} + +// Links an entity to a translation unit it is encountered in (many-to-many). +class EntityTranslationUnit { + public: + EntityTranslationUnit(EntityId entity_id, TranslationUnitId tu_id) + : entity_id_(entity_id), tu_id_(tu_id) { + CHECK_NE(entity_id, kInvalidEntityId); + } + + EntityId entity_id() const { return entity_id_; } + TranslationUnitId tu_id() const { return tu_id_; } + + bool operator==(const EntityTranslationUnit&) const = default; + std::strong_ordering operator<=>(const EntityTranslationUnit&) const = + default; + + private: + EntityId entity_id_; + TranslationUnitId tu_id_; +}; + +template +H AbslHashValue(H h, const EntityTranslationUnit& etu) { + return H::combine(std::move(h), etu.entity_id(), etu.tu_id()); +} + +// Links a reference to a translation unit it is encountered in (many-to-many). +class ReferenceTranslationUnit { + public: + ReferenceTranslationUnit(ReferenceId reference_id, TranslationUnitId tu_id) + : reference_id_(reference_id), tu_id_(tu_id) {} + + ReferenceId reference_id() const { return reference_id_; } + TranslationUnitId tu_id() const { return tu_id_; } + + bool operator==(const ReferenceTranslationUnit&) const = default; + std::strong_ordering operator<=>(const ReferenceTranslationUnit&) const = + default; + + private: + ReferenceId reference_id_; + TranslationUnitId tu_id_; +}; + +template +H AbslHashValue(H h, const ReferenceTranslationUnit& etu) { + return H::combine(std::move(h), etu.reference_id(), etu.tu_id()); +} + +// A set of optional metadata for incremental indexing support. +struct IncrementalIndexingMetadata { + std::vector translation_units; + std::vector entity_translation_units; + std::vector reference_translation_units; +}; + // A simple holder for a sorted index, used as an interchange format/interface // definition between uses of the index. struct FlatIndex { @@ -350,6 +431,7 @@ struct FlatIndex { std::vector entities; std::vector references; std::vector virtual_method_links; + std::optional incremental_indexing_metadata; }; namespace testing_internal {