From 03bc3919e79b76a16277dc504f872d487bf8b1f2 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Fri, 6 Jan 2023 01:32:20 +0100 Subject: [PATCH 1/9] Improve s2region_term_indexer: * Add ability to reuse term buffer * Add option for optimize index size if query only points --- src/s2/s2region_term_indexer.cc | 85 +++++++++++++++++++++++---------- src/s2/s2region_term_indexer.h | 35 ++++++++++++-- 2 files changed, 91 insertions(+), 29 deletions(-) diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc index 5c73e66d..39b2c6e1 100644 --- a/src/s2/s2region_term_indexer.cc +++ b/src/s2/s2region_term_indexer.cc @@ -126,6 +126,14 @@ string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id, vector S2RegionTermIndexer::GetIndexTerms(const S2Point& point, string_view prefix) { + vector terms; + GetIndexTerms(point, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetIndexTerms(const S2Point& point, + string_view prefix, + vector* terms) { // See the top of this file for an overview of the indexing strategy. // // The last cell generated by this loop is effectively the covering for @@ -136,12 +144,13 @@ vector S2RegionTermIndexer::GetIndexTerms(const S2Point& point, // max_level() != true_max_level() (see S2RegionCoverer::Options). const S2CellId id(point); - vector terms; - for (int level = options_.min_level(); level <= options_.max_level(); - level += options_.level_mod()) { - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + int level = options_.min_level(); + if (options_.query_contains_points_only()) { + level = options_.true_max_level(); + } + for (; level <= options_.max_level(); level += options_.level_mod()) { + terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); } - return terms; } vector S2RegionTermIndexer::GetIndexTerms(const S2Region& region, @@ -154,6 +163,13 @@ vector S2RegionTermIndexer::GetIndexTerms(const S2Region& region, vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( const S2CellUnion& covering, string_view prefix) { + vector terms; + GetIndexTermsForCanonicalCovering(covering, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( + const S2CellUnion& covering, string_view prefix, vector* terms) { // See the top of this file for an overview of the indexing strategy. // // Cells in the covering are normally indexed as covering terms. If we are @@ -168,7 +184,6 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( *coverer_.mutable_options() = options_; S2_CHECK(coverer_.IsCanonical(covering)); } - vector terms; S2CellId prev_id = S2CellId::None(); int true_max_level = options_.true_max_level(); for (S2CellId id : covering) { @@ -178,14 +193,20 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); + // assume level <= options_.true_max_level() - if (level < true_max_level) { - // Add a covering term for this cell. - terms.push_back(GetTerm(TermType::COVERING, id, prefix)); - } - if (level == true_max_level || !options_.optimize_for_space()) { - // Add an ancestor term for this cell at the constrained level. - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + const bool is_max_level_cell = level == true_max_level; + // Add a term for this cell, max_level cell ANCESTOR is optimization + terms->push_back(GetTerm(is_max_level_cell ? TermType::ANCESTOR + : TermType::COVERING, + id, prefix)); + + // If query only contains points, there are no need other terms. + if (options_.query_contains_points_only()) continue; + + if (!options_.optimize_for_space() && !is_max_level_cell) { + // Add an ancestor term for this cell. + terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); } // Finally, add ancestor terms for all the ancestors of this cell. while ((level -= options_.level_mod()) >= options_.min_level()) { @@ -194,29 +215,34 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( prev_id.parent(level) == ancestor_id) { break; // We have already processed this cell and its ancestors. } - terms.push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix)); + terms->push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix)); } prev_id = id; } - return terms; } vector S2RegionTermIndexer::GetQueryTerms(const S2Point& point, string_view prefix) { + vector terms; + GetQueryTerms(point, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetQueryTerms(const S2Point& point, + string_view prefix, + vector* terms) { // See the top of this file for an overview of the indexing strategy. const S2CellId id(point); - vector terms; // Recall that all true_max_level() cells are indexed only as ancestor terms. int level = options_.true_max_level(); - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); - if (options_.index_contains_points_only()) return terms; + terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + if (options_.index_contains_points_only()) return; // Add covering terms for all the ancestor cells. for (; level >= options_.min_level(); level -= options_.level_mod()) { - terms.push_back(GetTerm(TermType::COVERING, id.parent(level), prefix)); + terms->push_back(GetTerm(TermType::COVERING, id.parent(level), prefix)); } - return terms; } vector S2RegionTermIndexer::GetQueryTerms(const S2Region& region, @@ -229,13 +255,20 @@ vector S2RegionTermIndexer::GetQueryTerms(const S2Region& region, vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( const S2CellUnion& covering, string_view prefix) { + vector terms; + GetQueryTermsForCanonicalCovering(covering, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( + const S2CellUnion& covering, string_view prefix, vector* terms) { // See the top of this file for an overview of the indexing strategy. + S2_CHECK(!options_.query_contains_points_only()); if (google::DEBUG_MODE) { *coverer_.mutable_options() = options_; S2_CHECK(coverer_.IsCanonical(covering)); } - vector terms; S2CellId prev_id = S2CellId::None(); int true_max_level = options_.true_max_level(); for (S2CellId id : covering) { @@ -245,9 +278,10 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); + // assume level <= options_.true_max_level() // Cells in the covering are always queried as ancestor terms. - terms.push_back(GetTerm(TermType::ANCESTOR, id, prefix)); + terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); // If the index only contains points, there are no covering terms. if (options_.index_contains_points_only()) continue; @@ -255,8 +289,8 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( // If we are optimizing for index space rather than query time, cells are // also queried as covering terms (except for true_max_level() cells, // which are indexed and queried as ancestor cells only). - if (options_.optimize_for_space() && level < true_max_level) { - terms.push_back(GetTerm(TermType::COVERING, id, prefix)); + if (options_.optimize_for_space() && level != true_max_level) { + terms->push_back(GetTerm(TermType::COVERING, id, prefix)); } // Finally, add covering terms for all the ancestors of this cell. while ((level -= options_.level_mod()) >= options_.min_level()) { @@ -265,9 +299,8 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( prev_id.parent(level) == ancestor_id) { break; // We have already processed this cell and its ancestors. } - terms.push_back(GetTerm(TermType::COVERING, ancestor_id, prefix)); + terms->push_back(GetTerm(TermType::COVERING, ancestor_id, prefix)); } prev_id = id; } - return terms; } diff --git a/src/s2/s2region_term_indexer.h b/src/s2/s2region_term_indexer.h index 44650799..17587c5d 100644 --- a/src/s2/s2region_term_indexer.h +++ b/src/s2/s2region_term_indexer.h @@ -196,8 +196,21 @@ class S2RegionTermIndexer { // this flag if your index consists entirely of points.) // // DEFAULT: false - bool index_contains_points_only() const { return points_only_; } - void set_index_contains_points_only(bool value) { points_only_ = value; } + bool index_contains_points_only() const { return index_points_only_; } + void set_index_contains_points_only(bool value) { index_points_only_ = value; } + + // If your query will only contain points (rather than regions), be sure + // to set this flag. This will generate smaller and faster index that + // are specialized for the points-only case. + // + // With the default quality settings, this flag reduces the number of + // index terms by about a factor of two. (The improvement gets smaller + // as max_cells() is increased, but there is really no reason not to use + // this flag if your query consist entirely of points.) + // + // DEFAULT: false + bool query_contains_points_only() const { return query_points_only_; } + void set_query_contains_points_only(bool value) { query_points_only_ = value; } // If true, the index will be optimized for space rather than for query // time. With the default quality settings, this flag reduces the number @@ -221,7 +234,8 @@ class S2RegionTermIndexer { void set_marker_character(char ch); private: - bool points_only_ = false; + bool index_points_only_ = false; + bool query_points_only_ = false; bool optimize_for_space_ = false; std::string marker_ = std::string(1, '$'); }; @@ -287,6 +301,21 @@ class S2RegionTermIndexer { std::vector GetQueryTermsForCanonicalCovering( const S2CellUnion& covering, absl::string_view prefix); + // Same as above but allows to reuse same buffer for different points or use + // single buffer for multiple points (common case is GeoJson MultiPoint) + void GetIndexTerms(const S2Point& point, absl::string_view prefix, + std::vector* terms); + void GetQueryTerms(const S2Point& point, absl::string_view prefix, + std::vector* terms); + + // Same as above but allows to reuse same buffer for different covering + void GetIndexTermsForCanonicalCovering(const S2CellUnion &covering, + absl::string_view prefix, + std::vector *terms); + void GetQueryTermsForCanonicalCovering(const S2CellUnion &covering, + absl::string_view prefix, + std::vector *terms); + private: enum TermType { ANCESTOR, COVERING }; From 0a2ff438e424f896d7ea92d3fc5333b2a6fe913f Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:55:33 +0100 Subject: [PATCH 2/9] Fix test --- src/s2/s2region_term_indexer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index bff52a00..eee812c2 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -84,7 +84,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // random size. S2Cap cap; vector terms; - if (query_type == QueryType::CAP) { + if (query_type == QueryType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetQueryTerms(cap.center(), ""); } else { From 0f9ef7a0ebe8d52ebf73cfb4c441cc1e3d553a05 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:55:40 +0100 Subject: [PATCH 3/9] Add tests --- src/s2/s2region_term_indexer_test.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index eee812c2..f5e0435b 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -122,6 +122,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) { options.set_max_level(16); options.set_max_cells(20); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { @@ -131,6 +134,8 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { options.set_max_level(16); options.set_max_cells(20); TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { @@ -140,6 +145,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { options.set_max_level(12); options.set_level_mod(3); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { @@ -149,6 +157,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { options.set_max_level(S2CellId::kMaxLevel); // Use leaf cells. options.set_max_cells(8); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { @@ -160,6 +171,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { options.set_max_cells(20); options.set_index_contains_points_only(true); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { @@ -168,6 +182,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { options.set_index_contains_points_only(true); // Use default parameter values. TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, MarkerCharacter) { From ad3508f36d888fb5f968a874e8a1910236c69c3a Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:38:00 +0100 Subject: [PATCH 4/9] Make test complete --- src/s2/s2region_term_indexer_test.cc | 98 +++++++++++++--------------- 1 file changed, 44 insertions(+), 54 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index f5e0435b..e29f7952 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -112,79 +112,69 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, static_cast(query_terms) / absl::GetFlag(FLAGS_iters)); } -// We run one test case for each combination of space vs. time optimization, -// and indexing regions vs. only points. +using TestCase = std::tuple; + +class S2RegionTermIndexerTest : public testing::TestWithParam { +protected: + void SetUp() override { + query_type = std::get<0>(GetParam()); + options.set_optimize_for_space(std::get<1>(GetParam())); + options.set_index_contains_points_only(std::get<2>(GetParam())); + options.set_query_contains_points_only(std::get<3>(GetParam())); + if (query_type != QueryType::POINT && + options.query_contains_points_only()) { + GTEST_SKIP() << "Case query_type != QueryType::POINT && " + "options.query_contains_points_only() is invalid."; + } + } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) { S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. - options.set_max_level(16); - options.set_max_cells(20); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + QueryType query_type{}; +}; + +// We run one test case for each combination: +// QueryType: POINT, CAP +// optimize_for_space: false, true +// index_contains_points_only: false, true +// query_contains_points_only: false, true +// Case QueryType != Cap and query_contains_points_only == true is invalid, +// so we skip it +INSTANTIATE_TEST_CASE_P( + S2RegionTermIndexerTests, S2RegionTermIndexerTest, + testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP), + testing::Bool(), testing::Bool(), testing::Bool())); + +TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. +TEST_P(S2RegionTermIndexerTest, UseFaceCells) { + options.set_min_level(0); options.set_max_level(16); options.set_max_cells(20); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(6); // Constrain min/max levels. +TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) { + options.set_min_level(6); options.set_max_level(12); options.set_level_mod(3); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(true); // Optimize for space. +TEST_P(S2RegionTermIndexerTest, UseLeafCells) { options.set_min_level(4); - options.set_max_level(S2CellId::kMaxLevel); // Use leaf cells. + options.set_max_level(S2CellId::kMaxLevel); options.set_max_cells(8); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. +TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { + options.set_min_level(0); options.set_max_level(S2CellId::kMaxLevel); options.set_level_mod(2); options.set_max_cells(20); - options.set_index_contains_points_only(true); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); -} - -TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(true); // Optimize for space. - options.set_index_contains_points_only(true); - // Use default parameter values. - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } TEST(S2RegionTermIndexer, MarkerCharacter) { From 9d415367fda76e5f87fe437f3a893d573aa64d43 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:52:30 +0100 Subject: [PATCH 5/9] Improve doc --- src/s2/s2region_term_indexer.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc index 39b2c6e1..8deee62e 100644 --- a/src/s2/s2region_term_indexer.cc +++ b/src/s2/s2region_term_indexer.cc @@ -61,6 +61,12 @@ // never be any document regions larger than the query region. This can // significantly reduce the size of queries. // +// + If the query will contain only points (rather than general regions), then +// we can skip all the ancestor terms mentioned above (except last cell see +// `GetIndexTerms(const S2Point& point...` for details) because there will +// never be any document regions larger than the index region. This can +// significantly reduce the size of index. +// // + If it is more important to optimize index size rather than query speed, // the number of index terms can be reduced by creating ancestor terms only // for the *proper* ancestors of the cells in a document region, and @@ -193,7 +199,7 @@ void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); - // assume level <= options_.true_max_level() + S2_DCHECK_LE(level, options_.true_max_level()); const bool is_max_level_cell = level == true_max_level; // Add a term for this cell, max_level cell ANCESTOR is optimization @@ -278,7 +284,7 @@ void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); - // assume level <= options_.true_max_level() + S2_DCHECK_LE(level, options_.true_max_level()); // Cells in the covering are always queried as ancestor terms. terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); From 0662ad18a91e937b90b2818332552faeb1209ef6 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:05:09 +0100 Subject: [PATCH 6/9] Make better coverage --- src/s2/s2region_term_indexer_test.cc | 44 ++++++++++++++++------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index e29f7952..e1e2cf0a 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -44,10 +44,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing"); namespace { -enum QueryType { POINT, CAP }; +enum DataType { POINT, CAP }; void TestRandomCaps(const S2RegionTermIndexer::Options& options, - QueryType query_type) { + DataType index_type, DataType query_type) { // This function creates an index consisting either of points (if // options.index_contains_points_only() is true) or S2Caps of random size. // It then executes queries consisting of points (if query_type == POINT) @@ -63,7 +63,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // of random size (up to a full sphere). S2Cap cap; vector terms; - if (options.index_contains_points_only()) { + if (index_type == DataType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetIndexTerms(cap.center(), ""); } else { @@ -84,7 +84,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // random size. S2Cap cap; vector terms; - if (query_type == QueryType::POINT) { + if (query_type == DataType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetQueryTerms(cap.center(), ""); } else { @@ -112,24 +112,29 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, static_cast(query_terms) / absl::GetFlag(FLAGS_iters)); } -using TestCase = std::tuple; +using TestCase = std::tuple; class S2RegionTermIndexerTest : public testing::TestWithParam { protected: void SetUp() override { - query_type = std::get<0>(GetParam()); - options.set_optimize_for_space(std::get<1>(GetParam())); - options.set_index_contains_points_only(std::get<2>(GetParam())); - options.set_query_contains_points_only(std::get<3>(GetParam())); - if (query_type != QueryType::POINT && - options.query_contains_points_only()) { - GTEST_SKIP() << "Case query_type != QueryType::POINT && " + index_type = std::get<0>(GetParam()); + query_type = std::get<1>(GetParam()); + options.set_optimize_for_space(std::get<2>(GetParam())); + options.set_index_contains_points_only(std::get<3>(GetParam())); + options.set_query_contains_points_only(std::get<4>(GetParam())); + if (index_type != DataType::POINT && options.index_contains_points_only()) { + GTEST_SKIP() << "Case index_type != DataType::POINT && " + "options.index_contains_points_only() is invalid."; + } + if (query_type != DataType::POINT && options.query_contains_points_only()) { + GTEST_SKIP() << "Case query_type != DataType::POINT && " "options.query_contains_points_only() is invalid."; } } S2RegionTermIndexer::Options options; - QueryType query_type{}; + DataType index_type{}; + DataType query_type{}; }; // We run one test case for each combination: @@ -141,32 +146,33 @@ class S2RegionTermIndexerTest : public testing::TestWithParam { // so we skip it INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, - testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP), + testing::Combine(testing::Values(DataType::POINT, DataType::CAP), + testing::Values(DataType::POINT, DataType::CAP), testing::Bool(), testing::Bool(), testing::Bool())); TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseFaceCells) { options.set_min_level(0); options.set_max_level(16); options.set_max_cells(20); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) { options.set_min_level(6); options.set_max_level(12); options.set_level_mod(3); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseLeafCells) { options.set_min_level(4); options.set_max_level(S2CellId::kMaxLevel); options.set_max_cells(8); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { @@ -174,7 +180,7 @@ TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { options.set_max_level(S2CellId::kMaxLevel); options.set_level_mod(2); options.set_max_cells(20); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST(S2RegionTermIndexer, MarkerCharacter) { From a82070ca469e763609b027441515cce9cb949cf6 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:11:19 +0100 Subject: [PATCH 7/9] Fix comment --- src/s2/s2region_term_indexer_test.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index e1e2cf0a..0d09bcdd 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -138,12 +138,11 @@ class S2RegionTermIndexerTest : public testing::TestWithParam { }; // We run one test case for each combination: -// QueryType: POINT, CAP +// index_type: POINT, CAP +// query_type: POINT, CAP // optimize_for_space: false, true // index_contains_points_only: false, true // query_contains_points_only: false, true -// Case QueryType != Cap and query_contains_points_only == true is invalid, -// so we skip it INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, testing::Combine(testing::Values(DataType::POINT, DataType::CAP), From f185b9259d66d6152356ac4b34cf86b5f7ec5a4a Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:22:53 +0100 Subject: [PATCH 8/9] Make better test case names --- src/s2/s2region_term_indexer_test.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 0d09bcdd..65cd6f6b 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -27,6 +27,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/flags/flag.h" +#include "absl/strings/str_cat.h" #include "s2/base/commandlineflags.h" #include "s2/base/logging.h" @@ -44,7 +45,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing"); namespace { -enum DataType { POINT, CAP }; +enum DataType { + POINT = 0, + CAP = 1, +}; void TestRandomCaps(const S2RegionTermIndexer::Options& options, DataType index_type, DataType query_type) { @@ -147,7 +151,13 @@ INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, testing::Combine(testing::Values(DataType::POINT, DataType::CAP), testing::Values(DataType::POINT, DataType::CAP), - testing::Bool(), testing::Bool(), testing::Bool())); + testing::Bool(), testing::Bool(), testing::Bool()), + [](const testing::TestParamInfo& info) { + return absl::StrCat( + "Index", std::get<0>(info.param), "Query", std::get<1>(info.param), + "SpaceOpt", std::get<2>(info.param), "IndexOpt", + std::get<3>(info.param), "QueryOpt", std::get<4>(info.param)); + }); TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { TestRandomCaps(options, index_type, query_type); From eeec3ea4f1004d0c2f0fe5f511cc99331092664c Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Wed, 11 Jan 2023 10:07:54 +0100 Subject: [PATCH 9/9] Benchmark --- CMakeLists.txt | 37 ++++++------ src/s2/s2region_term_indexer_benchmark.cpp | 67 ++++++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) create mode 100644 src/s2/s2region_term_indexer_benchmark.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4044cd90..cad8ef6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ endif() # end up defined differently. There is probably a better way to achieve # this than assuming what absl used. # Using CACHE allows the user to override the default. -set(CMAKE_CXX_STANDARD 11 CACHE STRING "The C++ standard to build with") +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # No compiler-specific extensions, i.e. -std=c++11, not -std=gnu++11. set(CMAKE_CXX_EXTENSIONS OFF) @@ -73,6 +73,7 @@ endif() # add_subdirectory(s2-submodule) if (NOT TARGET absl::base) find_package(absl REQUIRED) + find_package(GTest) endif() find_package(OpenSSL REQUIRED) # pthreads isn't used directly, but this is still required for std::thread. @@ -216,13 +217,11 @@ add_library(s2 src/s2/util/math/mathutil.cc src/s2/util/units/length-units.cc) -if (GTEST_ROOT) add_library(s2testing STATIC src/s2/s2builderutil_testing.cc src/s2/s2shapeutil_testing.cc src/s2/s2testing.cc src/s2/thread_testing.cc) -endif() target_link_libraries( s2 @@ -248,13 +247,11 @@ target_link_libraries( absl::utility ${CMAKE_THREAD_LIBS_INIT}) -if (GTEST_ROOT) target_link_libraries( s2testing ${GFLAGS_LIBRARIES} ${GLOG_LIBRARIES} absl::memory absl::strings) -endif() # Allow other CMake projects to use this one with: # list(APPEND CMAKE_MODULE_PATH "/third_party/cmake") @@ -428,22 +425,13 @@ install(FILES src/s2/util/units/length-units.h src/s2/util/units/physical-units.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/s2/util/units") -if (GTEST_ROOT) set(S2_TARGETS s2 s2testing) -else() - set(S2_TARGETS s2) -endif() install(TARGETS ${S2_TARGETS} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") -message("GTEST_ROOT: ${GTEST_ROOT}") -if (GTEST_ROOT) - add_subdirectory(${GTEST_ROOT} build_gtest) - include_directories(${GTEST_ROOT}/include) - set(S2TestFiles src/s2/encoded_s2cell_id_vector_test.cc src/s2/encoded_s2point_vector_test.cc @@ -570,10 +558,9 @@ if (GTEST_ROOT) absl::span absl::strings absl::synchronization - gtest_main) + GTest::gtest_main) add_test(${test} ${test}) endforeach() -endif() if (BUILD_EXAMPLES AND TARGET s2testing) add_subdirectory("doc/examples" examples) @@ -582,3 +569,21 @@ endif() if (${SWIG_FOUND} AND ${Python3_FOUND}) add_subdirectory("src/python" python) endif() + +find_package(benchmark) + +add_executable(s2region_term_indexer_benchmark src/s2/s2region_term_indexer_benchmark.cpp) +target_link_libraries( + s2region_term_indexer_benchmark + PUBLIC + s2testing s2 + absl::base + absl::btree + absl::core_headers + absl::flags_reflection + absl::memory + absl::span + absl::strings + absl::synchronization + benchmark::benchmark + benchmark::benchmark_main) diff --git a/src/s2/s2region_term_indexer_benchmark.cpp b/src/s2/s2region_term_indexer_benchmark.cpp new file mode 100644 index 00000000..98f602a8 --- /dev/null +++ b/src/s2/s2region_term_indexer_benchmark.cpp @@ -0,0 +1,67 @@ +#include "s2region_term_indexer.h" +#include "s2testing.h" +#include + +static void BM_OldApiGetIndexTerms(benchmark::State &state) { + S2RegionTermIndexer indexer; + benchmark::DoNotOptimize(indexer); + auto const count = state.range(0); + for (auto _ : state) { + for (int64_t i = 0; i < count; ++i) { + auto value = indexer.GetIndexTerms(S2Testing::RandomPoint(), {}); + benchmark::DoNotOptimize(value); + } + } +} + +BENCHMARK(BM_OldApiGetIndexTerms)->RangeMultiplier(4)->Range(1, 65536); + +static void BM_NewApiGetIndexTerms(benchmark::State &state) { + S2RegionTermIndexer indexer; + benchmark::DoNotOptimize(indexer); + auto const count = state.range(0); + for (auto _ : state) { + std::vector terms; + benchmark::DoNotOptimize(terms); + for (int64_t i = 0; i < count; ++i) { + terms.clear(); + indexer.GetIndexTerms(S2Testing::RandomPoint(), {}, &terms); + benchmark::DoNotOptimize(terms); + } + } +} + +BENCHMARK(BM_NewApiGetIndexTerms)->RangeMultiplier(4)->Range(1, 65536); + +static void BM_OldApiGetQueryTerms(benchmark::State &state) { + S2RegionTermIndexer indexer; + benchmark::DoNotOptimize(indexer); + auto const count = state.range(0); + for (auto _ : state) { + for (int64_t i = 0; i < count; ++i) { + auto value = indexer.GetQueryTerms(S2Testing::RandomPoint(), {}); + benchmark::DoNotOptimize(value); + } + } +} + +BENCHMARK(BM_OldApiGetQueryTerms)->RangeMultiplier(4)->Range(1, 65536); + +static void BM_NewApiGetQueryTerms(benchmark::State &state) { + S2RegionTermIndexer indexer; + benchmark::DoNotOptimize(indexer); + auto const count = state.range(0); + for (auto _ : state) { + std::vector terms; + benchmark::DoNotOptimize(terms); + for (int64_t i = 0; i < count; ++i) { + terms.clear(); + indexer.GetQueryTerms(S2Testing::RandomPoint(), {}, &terms); + benchmark::DoNotOptimize(terms); + } + } +} + +BENCHMARK(BM_NewApiGetQueryTerms)->RangeMultiplier(4)->Range(1, 65536); + +BENCHMARK_MAIN(); \ No newline at end of file