From 03bc3919e79b76a16277dc504f872d487bf8b1f2 Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Fri, 6 Jan 2023 01:32:20 +0100
Subject: [PATCH 1/9] Improve s2region_term_indexer: * Add ability to reuse
 term buffer * Add option for optimize index size if query only points

---
 src/s2/s2region_term_indexer.cc | 85 +++++++++++++++++++++++----------
 src/s2/s2region_term_indexer.h  | 35 ++++++++++++--
 2 files changed, 91 insertions(+), 29 deletions(-)
diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc
index 5c73e66d..39b2c6e1 100644
--- a/src/s2/s2region_term_indexer.cc
+++ b/src/s2/s2region_term_indexer.cc
@@ -126,6 +126,14 @@ string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id,
 
 vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
                                                   string_view prefix) {
+  vector<string> terms;
+  GetIndexTerms(point, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
+                                        string_view prefix,
+                                        vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
   //
   // The last cell generated by this loop is effectively the covering for
@@ -136,12 +144,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
   // max_level() != true_max_level() (see S2RegionCoverer::Options).
 
   const S2CellId id(point);
-  vector<string> terms;
-  for (int level = options_.min_level(); level <= options_.max_level();
-       level += options_.level_mod()) {
-    terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+  int level = options_.min_level();
+  if (options_.query_contains_points_only()) {
+    level = options_.true_max_level();
+  }
+  for (; level <= options_.max_level(); level += options_.level_mod()) {
+    terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
@@ -154,6 +163,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
 
 vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
+  vector<string> terms;
+  GetIndexTermsForCanonicalCovering(covering, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
+    const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
   //
   // Cells in the covering are normally indexed as covering terms.  If we are
@@ -168,7 +184,6 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<string> terms;
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
@@ -178,14 +193,20 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
+    // assume level <= options_.true_max_level()
 
-    if (level < true_max_level) {
-      // Add a covering term for this cell.
-      terms.push_back(GetTerm(TermType::COVERING, id, prefix));
-    }
-    if (level == true_max_level || !options_.optimize_for_space()) {
-      // Add an ancestor term for this cell at the constrained level.
-      terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+    const bool is_max_level_cell = level == true_max_level;
+    // Add a term for this cell, max_level cell ANCESTOR is optimization
+    terms->push_back(GetTerm(is_max_level_cell ? TermType::ANCESTOR
+                                               : TermType::COVERING,
+                             id, prefix));
+
+    // If query only contains points, there are no need other terms.
+    if (options_.query_contains_points_only()) continue;
+
+    if (!options_.optimize_for_space() && !is_max_level_cell) {
+      // Add an ancestor term for this cell.
+      terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
     }
     // Finally, add ancestor terms for all the ancestors of this cell.
     while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -194,29 +215,34 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
           prev_id.parent(level) == ancestor_id) {
         break;  // We have already processed this cell and its ancestors.
       }
-      terms.push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
+      terms->push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
     }
     prev_id = id;
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
                                                   string_view prefix) {
+  vector<string> terms;
+  GetQueryTerms(point, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
+                                        string_view prefix,
+                                        vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
 
   const S2CellId id(point);
-  vector<string> terms;
   // Recall that all true_max_level() cells are indexed only as ancestor terms.
   int level = options_.true_max_level();
-  terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
-  if (options_.index_contains_points_only()) return terms;
+  terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+  if (options_.index_contains_points_only()) return;
 
   // Add covering terms for all the ancestor cells.
   for (; level >= options_.min_level(); level -= options_.level_mod()) {
-    terms.push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
+    terms->push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
@@ -229,13 +255,20 @@ vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
 
 vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
+  vector<string> terms;
+  GetQueryTermsForCanonicalCovering(covering, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
+    const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
 
+  S2_CHECK(!options_.query_contains_points_only());
   if (google::DEBUG_MODE) {
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<string> terms;
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
@@ -245,9 +278,10 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
+    // assume level <= options_.true_max_level()
 
     // Cells in the covering are always queried as ancestor terms.
-    terms.push_back(GetTerm(TermType::ANCESTOR, id, prefix));
+    terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
 
     // If the index only contains points, there are no covering terms.
     if (options_.index_contains_points_only()) continue;
@@ -255,8 +289,8 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     // If we are optimizing for index space rather than query time, cells are
     // also queried as covering terms (except for true_max_level() cells,
     // which are indexed and queried as ancestor cells only).
-    if (options_.optimize_for_space() && level < true_max_level) {
-      terms.push_back(GetTerm(TermType::COVERING, id, prefix));
+    if (options_.optimize_for_space() && level != true_max_level) {
+      terms->push_back(GetTerm(TermType::COVERING, id, prefix));
     }
     // Finally, add covering terms for all the ancestors of this cell.
     while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -265,9 +299,8 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
           prev_id.parent(level) == ancestor_id) {
         break;  // We have already processed this cell and its ancestors.
       }
-      terms.push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
+      terms->push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
     }
     prev_id = id;
   }
-  return terms;
 }
diff --git a/src/s2/s2region_term_indexer.h b/src/s2/s2region_term_indexer.h
index 44650799..17587c5d 100644
--- a/src/s2/s2region_term_indexer.h
+++ b/src/s2/s2region_term_indexer.h
@@ -196,8 +196,21 @@ class S2RegionTermIndexer {
     // this flag if your index consists entirely of points.)
     //
     // DEFAULT: false
-    bool index_contains_points_only() const { return points_only_; }
-    void set_index_contains_points_only(bool value) { points_only_ = value; }
+    bool index_contains_points_only() const { return index_points_only_; }
+    void set_index_contains_points_only(bool value) { index_points_only_ = value; }
+
+    // If your query will only contain points (rather than regions), be sure
+    // to set this flag.  This will generate smaller and faster index that
+    // are specialized for the points-only case.
+    //
+    // With the default quality settings, this flag reduces the number of
+    // index terms by about a factor of two.  (The improvement gets smaller
+    // as max_cells() is increased, but there is really no reason not to use
+    // this flag if your query consist entirely of points.)
+    //
+    // DEFAULT: false
+    bool query_contains_points_only() const { return query_points_only_; }
+    void set_query_contains_points_only(bool value) { query_points_only_ = value; }
 
     // If true, the index will be optimized for space rather than for query
     // time.  With the default quality settings, this flag reduces the number
@@ -221,7 +234,8 @@ class S2RegionTermIndexer {
     void set_marker_character(char ch);
 
    private:
-    bool points_only_ = false;
+    bool index_points_only_ = false;
+    bool query_points_only_ = false;
     bool optimize_for_space_ = false;
     std::string marker_ = std::string(1, '$');
   };
@@ -287,6 +301,21 @@ class S2RegionTermIndexer {
   std::vector<std::string> GetQueryTermsForCanonicalCovering(
       const S2CellUnion& covering, absl::string_view prefix);
 
+  // Same as above but allows to reuse same buffer for different points or use
+  // single buffer for multiple points (common case is GeoJson MultiPoint)
+  void GetIndexTerms(const S2Point& point, absl::string_view prefix,
+                     std::vector<std::string>* terms);
+  void GetQueryTerms(const S2Point& point, absl::string_view prefix,
+                     std::vector<std::string>* terms);
+
+  // Same as above but allows to reuse same buffer for different covering
+  void GetIndexTermsForCanonicalCovering(const S2CellUnion &covering,
+                                         absl::string_view prefix,
+                                         std::vector<std::string> *terms);
+  void GetQueryTermsForCanonicalCovering(const S2CellUnion &covering,
+                                         absl::string_view prefix,
+                                         std::vector<std::string> *terms);
+
  private:
   enum TermType { ANCESTOR, COVERING };
 

From 0a2ff438e424f896d7ea92d3fc5333b2a6fe913f Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Mon, 9 Jan 2023 17:55:33 +0100
Subject: [PATCH 2/9] Fix test

---
 src/s2/s2region_term_indexer_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index bff52a00..eee812c2 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -84,7 +84,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options,
     // random size.
     S2Cap cap;
     vector<string> terms;
-    if (query_type == QueryType::CAP) {
+    if (query_type == QueryType::POINT) {
       cap = S2Cap::FromPoint(S2Testing::RandomPoint());
       terms = indexer.GetQueryTerms(cap.center(), "");
     } else {

From 0f9ef7a0ebe8d52ebf73cfb4c441cc1e3d553a05 Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Mon, 9 Jan 2023 17:55:40 +0100
Subject: [PATCH 3/9] Add tests

---
 src/s2/s2region_term_indexer_test.cc | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index eee812c2..f5e0435b 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -122,6 +122,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) {
   options.set_max_level(16);
   options.set_max_cells(20);
   TestRandomCaps(options, QueryType::CAP);
+  TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) {
@@ -131,6 +134,8 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) {
   options.set_max_level(16);
   options.set_max_cells(20);
   TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) {
@@ -140,6 +145,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) {
   options.set_max_level(12);
   options.set_level_mod(3);
   TestRandomCaps(options, QueryType::CAP);
+  TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) {
@@ -149,6 +157,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) {
   options.set_max_level(S2CellId::kMaxLevel);  // Use leaf cells.
   options.set_max_cells(8);
   TestRandomCaps(options, QueryType::CAP);
+  TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) {
@@ -160,6 +171,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) {
   options.set_max_cells(20);
   options.set_index_contains_points_only(true);
   TestRandomCaps(options, QueryType::CAP);
+  TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) {
@@ -168,6 +182,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) {
   options.set_index_contains_points_only(true);
   // Use default parameter values.
   TestRandomCaps(options, QueryType::CAP);
+  TestRandomCaps(options, QueryType::POINT);
+  options.set_query_contains_points_only(true);
+  TestRandomCaps(options, QueryType::POINT);
 }
 
 TEST(S2RegionTermIndexer, MarkerCharacter) {

From ad3508f36d888fb5f968a874e8a1910236c69c3a Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Tue, 10 Jan 2023 21:38:00 +0100
Subject: [PATCH 4/9] Make test complete

---
 src/s2/s2region_term_indexer_test.cc | 98 +++++++++++++---------------
 1 file changed, 44 insertions(+), 54 deletions(-)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index f5e0435b..e29f7952 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -112,79 +112,69 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options,
          static_cast<double>(query_terms) / absl::GetFlag(FLAGS_iters));
 }
 
-// We run one test case for each combination of space vs. time optimization,
-// and indexing regions vs. only points.
+using TestCase = std::tuple<QueryType, bool, bool, bool>;
+
+class S2RegionTermIndexerTest : public testing::TestWithParam<TestCase> {
+protected:
+  void SetUp() override {
+    query_type = std::get<0>(GetParam());
+    options.set_optimize_for_space(std::get<1>(GetParam()));
+    options.set_index_contains_points_only(std::get<2>(GetParam()));
+    options.set_query_contains_points_only(std::get<3>(GetParam()));
+    if (query_type != QueryType::POINT &&
+        options.query_contains_points_only()) {
+      GTEST_SKIP() << "Case query_type != QueryType::POINT && "
+                      "options.query_contains_points_only() is invalid.";
+    }
+  }
 
-TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) {
   S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(false);       // Optimize for time.
-  options.set_min_level(0);                    // Use face cells.
-  options.set_max_level(16);
-  options.set_max_cells(20);
-  TestRandomCaps(options, QueryType::CAP);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
+  QueryType query_type{};
+};
+
+// We run one test case for each combination:
+// QueryType: POINT, CAP
+// optimize_for_space: false, true
+// index_contains_points_only: false, true
+// query_contains_points_only: false, true
+// Case QueryType != Cap and query_contains_points_only == true is invalid,
+// so we skip it
+INSTANTIATE_TEST_CASE_P(
+    S2RegionTermIndexerTests, S2RegionTermIndexerTest,
+    testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP),
+                     testing::Bool(), testing::Bool(), testing::Bool()));
+
+TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) {
+  TestRandomCaps(options, query_type);
 }
 
-TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) {
-  S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(false);       // Optimize for time.
-  options.set_min_level(0);                    // Use face cells.
+TEST_P(S2RegionTermIndexerTest, UseFaceCells) {
+  options.set_min_level(0);
   options.set_max_level(16);
   options.set_max_cells(20);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
+  TestRandomCaps(options, query_type);
 }
 
-TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) {
-  S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(false);       // Optimize for time.
-  options.set_min_level(6);                    // Constrain min/max levels.
+TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) {
+  options.set_min_level(6);
   options.set_max_level(12);
   options.set_level_mod(3);
-  TestRandomCaps(options, QueryType::CAP);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
+  TestRandomCaps(options, query_type);
 }
 
-TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) {
-  S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(true);        // Optimize for space.
+TEST_P(S2RegionTermIndexerTest, UseLeafCells) {
   options.set_min_level(4);
-  options.set_max_level(S2CellId::kMaxLevel);  // Use leaf cells.
+  options.set_max_level(S2CellId::kMaxLevel);
   options.set_max_cells(8);
-  TestRandomCaps(options, QueryType::CAP);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
+  TestRandomCaps(options, query_type);
 }
 
-TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) {
-  S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(false);       // Optimize for time.
-  options.set_min_level(0);                    // Use face cells.
+TEST_P(S2RegionTermIndexerTest, UseFaceCells2) {
+  options.set_min_level(0);
   options.set_max_level(S2CellId::kMaxLevel);
   options.set_level_mod(2);
   options.set_max_cells(20);
-  options.set_index_contains_points_only(true);
-  TestRandomCaps(options, QueryType::CAP);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
-}
-
-TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) {
-  S2RegionTermIndexer::Options options;
-  options.set_optimize_for_space(true);        // Optimize for space.
-  options.set_index_contains_points_only(true);
-  // Use default parameter values.
-  TestRandomCaps(options, QueryType::CAP);
-  TestRandomCaps(options, QueryType::POINT);
-  options.set_query_contains_points_only(true);
-  TestRandomCaps(options, QueryType::POINT);
+  TestRandomCaps(options, query_type);
 }
 
 TEST(S2RegionTermIndexer, MarkerCharacter) {

From 9d415367fda76e5f87fe437f3a893d573aa64d43 Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Tue, 10 Jan 2023 21:52:30 +0100
Subject: [PATCH 5/9] Improve doc

---
 src/s2/s2region_term_indexer.cc | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc
index 39b2c6e1..8deee62e 100644
--- a/src/s2/s2region_term_indexer.cc
+++ b/src/s2/s2region_term_indexer.cc
@@ -61,6 +61,12 @@
 //   never be any document regions larger than the query region.  This can
 //   significantly reduce the size of queries.
 //
+// + If the query will contain only points (rather than general regions), then
+//   we can skip all the ancestor terms mentioned above (except last cell see
+//   `GetIndexTerms(const S2Point& point...` for details) because there will
+//   never be any document regions larger than the index region.  This can
+//   significantly reduce the size of index.
+//
 // + If it is more important to optimize index size rather than query speed,
 //   the number of index terms can be reduced by creating ancestor terms only
 //   for the *proper* ancestors of the cells in a document region, and
@@ -193,7 +199,7 @@ void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
-    // assume level <= options_.true_max_level()
+    S2_DCHECK_LE(level, options_.true_max_level());
 
     const bool is_max_level_cell = level == true_max_level;
     // Add a term for this cell, max_level cell ANCESTOR is optimization
@@ -278,7 +284,7 @@ void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
-    // assume level <= options_.true_max_level()
+    S2_DCHECK_LE(level, options_.true_max_level());
 
     // Cells in the covering are always queried as ancestor terms.
     terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));

From 0662ad18a91e937b90b2818332552faeb1209ef6 Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Tue, 10 Jan 2023 22:05:09 +0100
Subject: [PATCH 6/9] Make better coverage

---
 src/s2/s2region_term_indexer_test.cc | 44 ++++++++++++++++------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index e29f7952..e1e2cf0a 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -44,10 +44,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing");
 
 namespace {
 
-enum QueryType { POINT, CAP };
+enum DataType { POINT, CAP };
 
 void TestRandomCaps(const S2RegionTermIndexer::Options& options,
-                    QueryType query_type) {
+                    DataType index_type, DataType query_type) {
   // This function creates an index consisting either of points (if
   // options.index_contains_points_only() is true) or S2Caps of random size.
   // It then executes queries consisting of points (if query_type == POINT)
@@ -63,7 +63,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options,
     // of random size (up to a full sphere).
     S2Cap cap;
     vector<string> terms;
-    if (options.index_contains_points_only()) {
+    if (index_type == DataType::POINT) {
       cap = S2Cap::FromPoint(S2Testing::RandomPoint());
       terms = indexer.GetIndexTerms(cap.center(), "");
     } else {
@@ -84,7 +84,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options,
     // random size.
     S2Cap cap;
     vector<string> terms;
-    if (query_type == QueryType::POINT) {
+    if (query_type == DataType::POINT) {
       cap = S2Cap::FromPoint(S2Testing::RandomPoint());
       terms = indexer.GetQueryTerms(cap.center(), "");
     } else {
@@ -112,24 +112,29 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options,
          static_cast<double>(query_terms) / absl::GetFlag(FLAGS_iters));
 }
 
-using TestCase = std::tuple<QueryType, bool, bool, bool>;
+using TestCase = std::tuple<DataType, DataType, bool, bool, bool>;
 
 class S2RegionTermIndexerTest : public testing::TestWithParam<TestCase> {
 protected:
   void SetUp() override {
-    query_type = std::get<0>(GetParam());
-    options.set_optimize_for_space(std::get<1>(GetParam()));
-    options.set_index_contains_points_only(std::get<2>(GetParam()));
-    options.set_query_contains_points_only(std::get<3>(GetParam()));
-    if (query_type != QueryType::POINT &&
-        options.query_contains_points_only()) {
-      GTEST_SKIP() << "Case query_type != QueryType::POINT && "
+    index_type = std::get<0>(GetParam());
+    query_type = std::get<1>(GetParam());
+    options.set_optimize_for_space(std::get<2>(GetParam()));
+    options.set_index_contains_points_only(std::get<3>(GetParam()));
+    options.set_query_contains_points_only(std::get<4>(GetParam()));
+    if (index_type != DataType::POINT && options.index_contains_points_only()) {
+      GTEST_SKIP() << "Case index_type != DataType::POINT && "
+                      "options.index_contains_points_only() is invalid.";
+    }
+    if (query_type != DataType::POINT && options.query_contains_points_only()) {
+      GTEST_SKIP() << "Case query_type != DataType::POINT && "
                       "options.query_contains_points_only() is invalid.";
     }
   }
 
   S2RegionTermIndexer::Options options;
-  QueryType query_type{};
+  DataType index_type{};
+  DataType query_type{};
 };
 
 // We run one test case for each combination:
@@ -141,32 +146,33 @@ class S2RegionTermIndexerTest : public testing::TestWithParam<TestCase> {
 // so we skip it
 INSTANTIATE_TEST_CASE_P(
     S2RegionTermIndexerTests, S2RegionTermIndexerTest,
-    testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP),
+    testing::Combine(testing::Values(DataType::POINT, DataType::CAP),
+                     testing::Values(DataType::POINT, DataType::CAP),
                      testing::Bool(), testing::Bool(), testing::Bool()));
 
 TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) {
-  TestRandomCaps(options, query_type);
+  TestRandomCaps(options, index_type, query_type);
 }
 
 TEST_P(S2RegionTermIndexerTest, UseFaceCells) {
   options.set_min_level(0);
   options.set_max_level(16);
   options.set_max_cells(20);
-  TestRandomCaps(options, query_type);
+  TestRandomCaps(options, index_type, query_type);
 }
 
 TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) {
   options.set_min_level(6);
   options.set_max_level(12);
   options.set_level_mod(3);
-  TestRandomCaps(options, query_type);
+  TestRandomCaps(options, index_type, query_type);
 }
 
 TEST_P(S2RegionTermIndexerTest, UseLeafCells) {
   options.set_min_level(4);
   options.set_max_level(S2CellId::kMaxLevel);
   options.set_max_cells(8);
-  TestRandomCaps(options, query_type);
+  TestRandomCaps(options, index_type, query_type);
 }
 
 TEST_P(S2RegionTermIndexerTest, UseFaceCells2) {
@@ -174,7 +180,7 @@ TEST_P(S2RegionTermIndexerTest, UseFaceCells2) {
   options.set_max_level(S2CellId::kMaxLevel);
   options.set_level_mod(2);
   options.set_max_cells(20);
-  TestRandomCaps(options, query_type);
+  TestRandomCaps(options, index_type, query_type);
 }
 
 TEST(S2RegionTermIndexer, MarkerCharacter) {

From a82070ca469e763609b027441515cce9cb949cf6 Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Tue, 10 Jan 2023 22:11:19 +0100
Subject: [PATCH 7/9] Fix comment

---
 src/s2/s2region_term_indexer_test.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index e1e2cf0a..0d09bcdd 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -138,12 +138,11 @@ class S2RegionTermIndexerTest : public testing::TestWithParam<TestCase> {
 };
 
 // We run one test case for each combination:
-// QueryType: POINT, CAP
+// index_type: POINT, CAP
+// query_type: POINT, CAP
 // optimize_for_space: false, true
 // index_contains_points_only: false, true
 // query_contains_points_only: false, true
-// Case QueryType != Cap and query_contains_points_only == true is invalid,
-// so we skip it
 INSTANTIATE_TEST_CASE_P(
     S2RegionTermIndexerTests, S2RegionTermIndexerTest,
     testing::Combine(testing::Values(DataType::POINT, DataType::CAP),

From f185b9259d66d6152356ac4b34cf86b5f7ec5a4a Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Tue, 10 Jan 2023 22:22:53 +0100
Subject: [PATCH 8/9] Make better test case names

---
 src/s2/s2region_term_indexer_test.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc
index 0d09bcdd..65cd6f6b 100644
--- a/src/s2/s2region_term_indexer_test.cc
+++ b/src/s2/s2region_term_indexer_test.cc
@@ -27,6 +27,7 @@
 
 #include "absl/container/flat_hash_map.h"
 #include "absl/flags/flag.h"
+#include "absl/strings/str_cat.h"
 
 #include "s2/base/commandlineflags.h"
 #include "s2/base/logging.h"
@@ -44,7 +45,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing");
 
 namespace {
 
-enum DataType { POINT, CAP };
+enum DataType {
+  POINT = 0,
+  CAP = 1,
+};
 
 void TestRandomCaps(const S2RegionTermIndexer::Options& options,
                     DataType index_type, DataType query_type) {
@@ -147,7 +151,13 @@ INSTANTIATE_TEST_CASE_P(
     S2RegionTermIndexerTests, S2RegionTermIndexerTest,
     testing::Combine(testing::Values(DataType::POINT, DataType::CAP),
                      testing::Values(DataType::POINT, DataType::CAP),
-                     testing::Bool(), testing::Bool(), testing::Bool()));
+                     testing::Bool(), testing::Bool(), testing::Bool()),
+    [](const testing::TestParamInfo<TestCase>& info) {
+      return absl::StrCat(
+          "Index", std::get<0>(info.param), "Query", std::get<1>(info.param),
+          "SpaceOpt", std::get<2>(info.param), "IndexOpt",
+          std::get<3>(info.param), "QueryOpt", std::get<4>(info.param));
+    });
 
 TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) {
   TestRandomCaps(options, index_type, query_type);

From eeec3ea4f1004d0c2f0fe5f511cc99331092664c Mon Sep 17 00:00:00 2001
From: Valery Mironov <32071355+MBkkt@users.noreply.github.com>
Date: Wed, 11 Jan 2023 10:07:54 +0100
Subject: [PATCH 9/9] Benchmark

---
 CMakeLists.txt                             | 37 ++++++------
 src/s2/s2region_term_indexer_benchmark.cpp | 67 ++++++++++++++++++++++
 2 files changed, 88 insertions(+), 16 deletions(-)
 create mode 100644 src/s2/s2region_term_indexer_benchmark.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4044cd90..cad8ef6a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ endif()
 # end up defined differently.  There is probably a better way to achieve
 # this than assuming what absl used. 
 # Using CACHE allows the user to override the default.
-set(CMAKE_CXX_STANDARD 11 CACHE STRING "The C++ standard to build with")
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # No compiler-specific extensions, i.e. -std=c++11, not -std=gnu++11.
 set(CMAKE_CXX_EXTENSIONS OFF)
@@ -73,6 +73,7 @@ endif()
 # add_subdirectory(s2-submodule)
 if (NOT TARGET absl::base)
     find_package(absl REQUIRED)
+    find_package(GTest)
 endif()
 find_package(OpenSSL REQUIRED)
 # pthreads isn't used directly, but this is still required for std::thread.
@@ -216,13 +217,11 @@ add_library(s2
             src/s2/util/math/mathutil.cc
             src/s2/util/units/length-units.cc)
 
-if (GTEST_ROOT)
   add_library(s2testing STATIC
               src/s2/s2builderutil_testing.cc
               src/s2/s2shapeutil_testing.cc
               src/s2/s2testing.cc
               src/s2/thread_testing.cc)
-endif()
 
 target_link_libraries(
     s2
@@ -248,13 +247,11 @@ target_link_libraries(
     absl::utility
     ${CMAKE_THREAD_LIBS_INIT})
 
-if (GTEST_ROOT)
   target_link_libraries(
       s2testing
       ${GFLAGS_LIBRARIES} ${GLOG_LIBRARIES}
       absl::memory
       absl::strings)
-endif()
 
 # Allow other CMake projects to use this one with:
 # list(APPEND CMAKE_MODULE_PATH "<path_to_s2geometry_dir>/third_party/cmake")
@@ -428,22 +425,13 @@ install(FILES src/s2/util/units/length-units.h
               src/s2/util/units/physical-units.h
         DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/s2/util/units")
 
-if (GTEST_ROOT)
   set(S2_TARGETS s2 s2testing)
-else()
-  set(S2_TARGETS s2)
-endif()
 
 install(TARGETS ${S2_TARGETS}
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
         ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
 
-message("GTEST_ROOT: ${GTEST_ROOT}")
-if (GTEST_ROOT)
-  add_subdirectory(${GTEST_ROOT} build_gtest)
-  include_directories(${GTEST_ROOT}/include)
-
   set(S2TestFiles
       src/s2/encoded_s2cell_id_vector_test.cc
       src/s2/encoded_s2point_vector_test.cc
@@ -570,10 +558,9 @@ if (GTEST_ROOT)
         absl::span
         absl::strings
         absl::synchronization
-        gtest_main)
+        GTest::gtest_main)
     add_test(${test} ${test})
   endforeach()
-endif()
 
 if (BUILD_EXAMPLES AND TARGET s2testing)
   add_subdirectory("doc/examples" examples)
@@ -582,3 +569,21 @@ endif()
 if (${SWIG_FOUND} AND ${Python3_FOUND})
   add_subdirectory("src/python" python)
 endif()
+
+find_package(benchmark)
+
+add_executable(s2region_term_indexer_benchmark src/s2/s2region_term_indexer_benchmark.cpp)
+target_link_libraries(
+        s2region_term_indexer_benchmark
+        PUBLIC
+        s2testing s2
+        absl::base
+        absl::btree
+        absl::core_headers
+        absl::flags_reflection
+        absl::memory
+        absl::span
+        absl::strings
+        absl::synchronization
+        benchmark::benchmark
+        benchmark::benchmark_main)
diff --git a/src/s2/s2region_term_indexer_benchmark.cpp b/src/s2/s2region_term_indexer_benchmark.cpp
new file mode 100644
index 00000000..98f602a8
--- /dev/null
+++ b/src/s2/s2region_term_indexer_benchmark.cpp
@@ -0,0 +1,67 @@
+#include "s2region_term_indexer.h"
+#include "s2testing.h"
+#include <benchmark/benchmark.h>
+
+static void BM_OldApiGetIndexTerms(benchmark::State &state) {
+  S2RegionTermIndexer indexer;
+  benchmark::DoNotOptimize(indexer);
+  auto const count = state.range(0);
+  for (auto _ : state) {
+    for (int64_t i = 0; i < count; ++i) {
+      auto value = indexer.GetIndexTerms(S2Testing::RandomPoint(), {});
+      benchmark::DoNotOptimize(value);
+    }
+  }
+}
+
+BENCHMARK(BM_OldApiGetIndexTerms)->RangeMultiplier(4)->Range(1, 65536);
+
+static void BM_NewApiGetIndexTerms(benchmark::State &state) {
+  S2RegionTermIndexer indexer;
+  benchmark::DoNotOptimize(indexer);
+  auto const count = state.range(0);
+  for (auto _ : state) {
+    std::vector<std::string> terms;
+    benchmark::DoNotOptimize(terms);
+    for (int64_t i = 0; i < count; ++i) {
+      terms.clear();
+      indexer.GetIndexTerms(S2Testing::RandomPoint(), {}, &terms);
+      benchmark::DoNotOptimize(terms);
+    }
+  }
+}
+
+BENCHMARK(BM_NewApiGetIndexTerms)->RangeMultiplier(4)->Range(1, 65536);
+
+static void BM_OldApiGetQueryTerms(benchmark::State &state) {
+  S2RegionTermIndexer indexer;
+  benchmark::DoNotOptimize(indexer);
+  auto const count = state.range(0);
+  for (auto _ : state) {
+    for (int64_t i = 0; i < count; ++i) {
+      auto value = indexer.GetQueryTerms(S2Testing::RandomPoint(), {});
+      benchmark::DoNotOptimize(value);
+    }
+  }
+}
+
+BENCHMARK(BM_OldApiGetQueryTerms)->RangeMultiplier(4)->Range(1, 65536);
+
+static void BM_NewApiGetQueryTerms(benchmark::State &state) {
+  S2RegionTermIndexer indexer;
+  benchmark::DoNotOptimize(indexer);
+  auto const count = state.range(0);
+  for (auto _ : state) {
+    std::vector<std::string> terms;
+    benchmark::DoNotOptimize(terms);
+    for (int64_t i = 0; i < count; ++i) {
+      terms.clear();
+      indexer.GetQueryTerms(S2Testing::RandomPoint(), {}, &terms);
+      benchmark::DoNotOptimize(terms);
+    }
+  }
+}
+
+BENCHMARK(BM_NewApiGetQueryTerms)->RangeMultiplier(4)->Range(1, 65536);
+
+BENCHMARK_MAIN();
\ No newline at end of file