google · MBkkt · Jan 6, 2023 · Jan 9, 2023 · Jan 9, 2023 · Jan 10, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,7 +20,7 @@ endif()
 # end up defined differently.  There is probably a better way to achieve
 # this than assuming what absl used. 
 # Using CACHE allows the user to override the default.
-set(CMAKE_CXX_STANDARD 11 CACHE STRING "The C++ standard to build with")
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # No compiler-specific extensions, i.e. -std=c++11, not -std=gnu++11.
 set(CMAKE_CXX_EXTENSIONS OFF)
@@ -73,6 +73,7 @@ endif()
 # add_subdirectory(s2-submodule)
 if (NOT TARGET absl::base)
     find_package(absl REQUIRED)
+    find_package(GTest)
 endif()
 find_package(OpenSSL REQUIRED)
 # pthreads isn't used directly, but this is still required for std::thread.
@@ -216,13 +217,11 @@ add_library(s2
             src/s2/util/math/mathutil.cc
             src/s2/util/units/length-units.cc)
 
-if (GTEST_ROOT)
   add_library(s2testing STATIC
               src/s2/s2builderutil_testing.cc
               src/s2/s2shapeutil_testing.cc
               src/s2/s2testing.cc
               src/s2/thread_testing.cc)
-endif()
 
 target_link_libraries(
     s2
@@ -248,13 +247,11 @@ target_link_libraries(
     absl::utility
     ${CMAKE_THREAD_LIBS_INIT})
 
-if (GTEST_ROOT)
   target_link_libraries(
       s2testing
       ${GFLAGS_LIBRARIES} ${GLOG_LIBRARIES}
       absl::memory
       absl::strings)
-endif()
 
 # Allow other CMake projects to use this one with:
 # list(APPEND CMAKE_MODULE_PATH "<path_to_s2geometry_dir>/third_party/cmake")
@@ -428,22 +425,13 @@ install(FILES src/s2/util/units/length-units.h
               src/s2/util/units/physical-units.h
         DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/s2/util/units")
 
-if (GTEST_ROOT)
   set(S2_TARGETS s2 s2testing)
-else()
-  set(S2_TARGETS s2)
-endif()
 
 install(TARGETS ${S2_TARGETS}
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
         ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
 
-message("GTEST_ROOT: ${GTEST_ROOT}")
-if (GTEST_ROOT)
-  add_subdirectory(${GTEST_ROOT} build_gtest)
-  include_directories(${GTEST_ROOT}/include)
-
   set(S2TestFiles
       src/s2/encoded_s2cell_id_vector_test.cc
       src/s2/encoded_s2point_vector_test.cc
@@ -570,10 +558,9 @@ if (GTEST_ROOT)
         absl::span
         absl::strings
         absl::synchronization
-        gtest_main)
+        GTest::gtest_main)
     add_test(${test} ${test})
   endforeach()
-endif()
 
 if (BUILD_EXAMPLES AND TARGET s2testing)
   add_subdirectory("doc/examples" examples)
@@ -582,3 +569,21 @@ endif()
 if (${SWIG_FOUND} AND ${Python3_FOUND})
   add_subdirectory("src/python" python)
 endif()
+
+find_package(benchmark)
+
+add_executable(s2region_term_indexer_benchmark src/s2/s2region_term_indexer_benchmark.cpp)
+target_link_libraries(
+        s2region_term_indexer_benchmark
+        PUBLIC
+        s2testing s2
+        absl::base
+        absl::btree
+        absl::core_headers
+        absl::flags_reflection
+        absl::memory
+        absl::span
+        absl::strings
+        absl::synchronization
+        benchmark::benchmark
+        benchmark::benchmark_main)
diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc
@@ -61,6 +61,12 @@
 //   never be any document regions larger than the query region.  This can
 //   significantly reduce the size of queries.
 //
+// + If the query will contain only points (rather than general regions), then
+//   we can skip all the ancestor terms mentioned above (except last cell see
+//   `GetIndexTerms(const S2Point& point...` for details) because there will
+//   never be any document regions larger than the index region.  This can
+//   significantly reduce the size of index.
+//
 // + If it is more important to optimize index size rather than query speed,
 //   the number of index terms can be reduced by creating ancestor terms only
 //   for the *proper* ancestors of the cells in a document region, and
@@ -126,6 +132,14 @@ string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id,
 
 vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
                                                   string_view prefix) {
+  vector<string> terms;
+  GetIndexTerms(point, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
+                                        string_view prefix,
+                                        vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
   //
   // The last cell generated by this loop is effectively the covering for
@@ -136,12 +150,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
   // max_level() != true_max_level() (see S2RegionCoverer::Options).
 
   const S2CellId id(point);
-  vector<string> terms;
-  for (int level = options_.min_level(); level <= options_.max_level();
-       level += options_.level_mod()) {
-    terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+  int level = options_.min_level();
+  if (options_.query_contains_points_only()) {
+    level = options_.true_max_level();
+  }
+  for (; level <= options_.max_level(); level += options_.level_mod()) {
+    terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
@@ -154,6 +169,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
 
 vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
+  vector<string> terms;
+  GetIndexTermsForCanonicalCovering(covering, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
+    const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
   //
   // Cells in the covering are normally indexed as covering terms.  If we are
@@ -168,7 +190,6 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<string> terms;
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
@@ -178,14 +199,20 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
+    S2_DCHECK_LE(level, options_.true_max_level());
 
-    if (level < true_max_level) {
-      // Add a covering term for this cell.
-      terms.push_back(GetTerm(TermType::COVERING, id, prefix));
-    }
-    if (level == true_max_level || !options_.optimize_for_space()) {
-      // Add an ancestor term for this cell at the constrained level.
-      terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+    const bool is_max_level_cell = level == true_max_level;
+    // Add a term for this cell, max_level cell ANCESTOR is optimization
+    terms->push_back(GetTerm(is_max_level_cell ? TermType::ANCESTOR
+                                               : TermType::COVERING,
+                             id, prefix));
+
+    // If query only contains points, there are no need other terms.
+    if (options_.query_contains_points_only()) continue;
+
+    if (!options_.optimize_for_space() && !is_max_level_cell) {
+      // Add an ancestor term for this cell.
+      terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
     }
     // Finally, add ancestor terms for all the ancestors of this cell.
     while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -194,29 +221,34 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
           prev_id.parent(level) == ancestor_id) {
         break;  // We have already processed this cell and its ancestors.
       }
-      terms.push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
+      terms->push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
     }
     prev_id = id;
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
                                                   string_view prefix) {
+  vector<string> terms;
+  GetQueryTerms(point, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
+                                        string_view prefix,
+                                        vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
 
   const S2CellId id(point);
-  vector<string> terms;
   // Recall that all true_max_level() cells are indexed only as ancestor terms.
   int level = options_.true_max_level();
-  terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
-  if (options_.index_contains_points_only()) return terms;
+  terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
+  if (options_.index_contains_points_only()) return;
 
   // Add covering terms for all the ancestor cells.
   for (; level >= options_.min_level(); level -= options_.level_mod()) {
-    terms.push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
+    terms->push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
   }
-  return terms;
 }
 
 vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
@@ -229,13 +261,20 @@ vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
 
 vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
+  vector<string> terms;
+  GetQueryTermsForCanonicalCovering(covering, prefix, &terms);
+  return terms;
+}
+
+void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
+    const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
   // See the top of this file for an overview of the indexing strategy.
 
+  S2_CHECK(!options_.query_contains_points_only());
   if (google::DEBUG_MODE) {
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<string> terms;
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
@@ -245,18 +284,19 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     S2_DCHECK_GE(level, options_.min_level());
     S2_DCHECK_LE(level, options_.max_level());
     S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
+    S2_DCHECK_LE(level, options_.true_max_level());
 
     // Cells in the covering are always queried as ancestor terms.
-    terms.push_back(GetTerm(TermType::ANCESTOR, id, prefix));
+    terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
 
     // If the index only contains points, there are no covering terms.
     if (options_.index_contains_points_only()) continue;
 
     // If we are optimizing for index space rather than query time, cells are
     // also queried as covering terms (except for true_max_level() cells,
     // which are indexed and queried as ancestor cells only).
-    if (options_.optimize_for_space() && level < true_max_level) {
-      terms.push_back(GetTerm(TermType::COVERING, id, prefix));
+    if (options_.optimize_for_space() && level != true_max_level) {
+      terms->push_back(GetTerm(TermType::COVERING, id, prefix));
     }
     // Finally, add covering terms for all the ancestors of this cell.
     while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -265,9 +305,8 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
           prev_id.parent(level) == ancestor_id) {
         break;  // We have already processed this cell and its ancestors.
       }
-      terms.push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
+      terms->push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
     }
     prev_id = id;
   }
-  return terms;
 }
diff --git a/src/s2/s2region_term_indexer.h b/src/s2/s2region_term_indexer.h
@@ -196,8 +196,21 @@ class S2RegionTermIndexer {
     // this flag if your index consists entirely of points.)
     //
     // DEFAULT: false
-    bool index_contains_points_only() const { return points_only_; }
-    void set_index_contains_points_only(bool value) { points_only_ = value; }
+    bool index_contains_points_only() const { return index_points_only_; }
+    void set_index_contains_points_only(bool value) { index_points_only_ = value; }
+
+    // If your query will only contain points (rather than regions), be sure
+    // to set this flag.  This will generate smaller and faster index that
+    // are specialized for the points-only case.
+    //
+    // With the default quality settings, this flag reduces the number of
+    // index terms by about a factor of two.  (The improvement gets smaller
+    // as max_cells() is increased, but there is really no reason not to use
+    // this flag if your query consist entirely of points.)
+    //
+    // DEFAULT: false
+    bool query_contains_points_only() const { return query_points_only_; }
+    void set_query_contains_points_only(bool value) { query_points_only_ = value; }
 
     // If true, the index will be optimized for space rather than for query
     // time.  With the default quality settings, this flag reduces the number
@@ -221,7 +234,8 @@ class S2RegionTermIndexer {
     void set_marker_character(char ch);
 
    private:
-    bool points_only_ = false;
+    bool index_points_only_ = false;
+    bool query_points_only_ = false;
     bool optimize_for_space_ = false;
     std::string marker_ = std::string(1, '$');
   };
@@ -287,6 +301,21 @@ class S2RegionTermIndexer {
   std::vector<std::string> GetQueryTermsForCanonicalCovering(
       const S2CellUnion& covering, absl::string_view prefix);
 
+  // Same as above but allows to reuse same buffer for different points or use
+  // single buffer for multiple points (common case is GeoJson MultiPoint)
+  void GetIndexTerms(const S2Point& point, absl::string_view prefix,
+                     std::vector<std::string>* terms);
+  void GetQueryTerms(const S2Point& point, absl::string_view prefix,
+                     std::vector<std::string>* terms);
+
+  // Same as above but allows to reuse same buffer for different covering
+  void GetIndexTermsForCanonicalCovering(const S2CellUnion &covering,
+                                         absl::string_view prefix,
+                                         std::vector<std::string> *terms);
+  void GetQueryTermsForCanonicalCovering(const S2CellUnion &covering,
+                                         absl::string_view prefix,
+                                         std::vector<std::string> *terms);
+
  private:
   enum TermType { ANCESTOR, COVERING };