diff --git a/geographyBench/pom.xml b/geographyBench/pom.xml new file mode 100644 index 0000000000..553d3db6c0 --- /dev/null +++ b/geographyBench/pom.xml @@ -0,0 +1,261 @@ + + + + 4.0.0 + + org.apache.sedona + sedona-parent + 1.8.0-SNAPSHOT + ../pom.xml + + geographyBench + + ${project.groupId}:${project.artifactId} + A benchmarking module for geography operations. http://sedona.apache.org/ + jar + + + ${skip.deploy.common.modules} + ${java.version} + ${java.version} + 1.37 + + + + + org.apache.sedona + sedona-common + ${project.version} + + + com.fasterxml.jackson.core + * + + + it.geosolutions.jaiext.jiffle + * + + + org.codehaus.janino + * + + + + + org.apache.sedona + sedona-spark-common-${spark.compat.version}_${scala.compat.version} + ${project.version} + + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + org.apache.commons + commons-math3 + + + org.slf4j + slf4j-api + + compile + + + + org.slf4j + slf4j-simple + 1.7.32 + runtime + + + org.geotools + gt-main + + + org.geotools + gt-referencing + + + org.geotools + gt-epsg-hsql + + + org.geotools + gt-geotiff + + + org.geotools + gt-arcgrid + + + org.geotools + gt-process-feature + + + org.locationtech.jts + jts-core + + + org.wololo + jts2geojson + + + org.locationtech.spatial4j + spatial4j + + + + + + org.datasyslab + s2-geometry-library + + + com.uber + h3 + + + com.esotericsoftware + kryo + compile + + + net.sf.geographiclib + GeographicLib-Java + + + com.github.ben-manes.caffeine + caffeine + + + it.geosolutions.jaiext.jiffle + jt-jiffle-language + + + edu.ucar + cdm-core + + + + org.codehaus.janino + janino + ${janino-version} + + + tools.profiler + async-profiler + 4.1 + compile + + + + src/jmh/java + + + org.apache.maven.plugins + maven-compiler-plugin + + 11 + 11 + + + + + io.paradoxical + resolved-pom-maven-plugin + 1.0 + + + resolve-my-pom + none + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + org.openjdk.jmh.Main + + + + + it.geosolutions.jaiext.jiffle + org.apache.sedona.shaded.jiffle + + it.geosolutions.jaiext.jiffle.runtime.* + + + + org.antlr.v4.runtime + org.apache.sedona.shaded.antlr + + + org.codehaus + org.apache.sedona.shaded.codehaus + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeogToGeom.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeogToGeom.java new file mode 100644 index 0000000000..7e1a67804b --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeogToGeom.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import com.google.common.geometry.S2LatLng; +import com.google.common.geometry.S2Loop; +import com.google.common.geometry.S2Point; +import com.google.common.geometry.S2Polygon; +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.S2Geography.Geography.GeographyKind; +import org.apache.sedona.common.S2Geography.MultiPolygonGeography; +import org.apache.sedona.common.S2Geography.PolygonGeography; +import org.apache.sedona.common.geography.Constructors; +import org.locationtech.jts.geom.Geometry; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +/** + * Benchmarks converting Sedona S2Geography -> JTS Geometry: - PolygonGeography -> JTS Polygon - + * MultiPolygonGeography -> JTS MultiPolygon + * + *

Requirements: - JMH on classpath - async-profiler + ap-loader (if you want the profiler hook + * enabled) + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class BenchGeogToGeom { + + // ============== Params ============== + // -------- Params -------- + @Param({"1", "16", "256", "1024"}) + public int numPolygons; + + @Param({"4", "16", "256", "1024"}) + public int verticesPerPolygon; + + @Param({"XY", "XYZ"}) + public String dimension; + + /** Kept for parity with your other benches; not used by this conversion. */ + @Param({"COMPACT"}) + public String pointEncoding; + + // ============== Reused data ============== + private List polygons; // synthetic S2 polygons + private Geography polygonGeog; // Geography (single polygon) + private Geography multiPolygonGeog; // Geography (multi polygon) + + // ============== Setup ============== + @Setup(Level.Trial) + public void setup() { + this.polygons = buildPolygons(numPolygons, verticesPerPolygon); + + // Single polygon geography (use the first polygon) + this.polygonGeog = new PolygonGeography(polygons.get(0)); + + // MultiPolygon geography (all polygons) + this.multiPolygonGeog = new MultiPolygonGeography(GeographyKind.MULTIPOLYGON, polygons); + } + + // ============== Benchmarks ============== + /** Convert a single PolygonGeography to JTS Geometry. */ + @Benchmark + public void geogPolygon_toJts(ProfilerHook ph, Blackhole bh) { + Geometry jts = Constructors.geogToGeometry(polygonGeog); + bh.consume(jts); + } + + /** Convert a MultiPolygonGeography to JTS Geometry. */ + @Benchmark + public void geogMultiPolygon_toJts(ProfilerHook ph, Blackhole bh) { + Geometry jts = Constructors.geogToGeometry(multiPolygonGeog); + bh.consume(jts); + } + + // ============== Helpers ============== + /** + * Builds non‑overlapping S2Polygons by placing each shell on a small circle around a translated + * center. Each polygon consists of a single outer loop; no holes are added for simplicity. + */ + public static List buildPolygons(int count, int verticesPerPolygon) { + List result = new ArrayList<>(Math.max(1, count)); + double radiusDeg = 0.1; // small circle around the center + + for (int j = 0; j < Math.max(1, count); j++) { + // spread centers to avoid overlaps and pathological degeneracies + double centerLat = -60.0 + j * 0.5; + double centerLng = -170.0 + j * 0.5; + + List verts = new ArrayList<>(Math.max(3, verticesPerPolygon)); + for (int i = 0; i < Math.max(3, verticesPerPolygon); i++) { + double angle = (2.0 * Math.PI * i) / Math.max(3, verticesPerPolygon); + double lat = centerLat + radiusDeg * Math.cos(angle); + double lng = centerLng + radiusDeg * Math.sin(angle); + verts.add(S2LatLng.fromDegrees(lat, lng).toPoint()); + } + + S2Loop shell = new S2Loop(verts); + shell.normalize(); // CCW for shells + result.add(new S2Polygon(shell)); + } + return result; + } + + // ============== Async‑profiler hook (optional) ============== + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,all-user", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeomToGeog.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeomToGeog.java new file mode 100644 index 0000000000..4a5cd88776 --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchGeomToGeog.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.geography.Constructors; +import org.locationtech.jts.geom.*; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +/** + * Benchmarks converting JTS Geometry -> Sedona S2Geography: - Polygon -> PolygonGeography - + * MultiPolygon -> MultiPolygonGeography + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class BenchGeomToGeog { + + // ============== Params ============== + /** + * Number of polygons in the MultiPolygon benchmark. Polygon benchmark uses the first polygon + * only. + */ + // -------- Params -------- + @Param({"1", "16", "256", "1024"}) + public int numPolygons; + + @Param({"4", "16", "256", "1024"}) + public int verticesPerPolygon; + + /** XY or XYZ coordinates in the generated JTS polygons. */ + @Param({"XY", "XYZ"}) + public String dimension; + + // ============== Reused data ============== + private GeometryFactory gf; + private List polygons; // generated JTS polygons + private Polygon polygon; // first polygon + private MultiPolygon multiPolygon; // union as MultiPolygon + + // ============== Setup ============== + @Setup(Level.Trial) + public void setup() { + // SRID=4326 for geographic degrees; PrecisionModel floating + gf = new GeometryFactory(new PrecisionModel(PrecisionModel.FLOATING), 4326); + + polygons = buildPolygons(numPolygons, verticesPerPolygon, dimension, gf); + polygon = polygons.get(0); + + Polygon[] arr = polygons.toArray(new Polygon[0]); + multiPolygon = gf.createMultiPolygon(arr); + } + + // ============== Benchmarks ============== + @Benchmark + public void geomPolygon_toGeog(ProfilerHook ph, Blackhole bh) { + Geography g = Constructors.geomToGeography(polygon); + bh.consume(g); + } + + @Benchmark + public void geomMultiPolygon_toGeog(ProfilerHook ph, Blackhole bh) { + Geography g = Constructors.geomToGeography(multiPolygon); + bh.consume(g); + } + + // ============== Helpers ============== + /** + * Builds non-overlapping JTS Polygons in EPSG:4326. Each polygon is a simple closed ring sampled + * on a small circle (no holes). + */ + public static List buildPolygons( + int count, int verticesPerPolygon, String dimension, GeometryFactory gf) { + int n = Math.max(1, count); + int v = Math.max(3, verticesPerPolygon); + boolean useZ = "XYZ".equalsIgnoreCase(dimension); + + List result = new ArrayList<>(n); + double radiusDeg = 0.1; // small radius around each center + + for (int j = 0; j < n; j++) { + double centerLat = -60.0 + j * 0.5; + double centerLng = -170.0 + j * 0.5; + + Coordinate[] coords = new Coordinate[v + 1]; + for (int i = 0; i < v; i++) { + double angle = (2.0 * Math.PI * i) / v; + double lat = centerLat + radiusDeg * Math.cos(angle); + double lng = centerLng + radiusDeg * Math.sin(angle); + double z = useZ ? 10.0 * Math.sin(angle) : Double.NaN; // smooth Z variation + coords[i] = useZ ? new Coordinate(lng, lat, z) : new Coordinate(lng, lat); + } + // close the ring + coords[v] = new Coordinate(coords[0]); + + LinearRing shell = gf.createLinearRing(coords); + Polygon poly = gf.createPolygon(shell, null); + result.add(poly); + } + return result; + } + + // ============== Async‑profiler hook (optional) ============== + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,all-user", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPointWKB.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPointWKB.java new file mode 100644 index 0000000000..62e6eea21f --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPointWKB.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.*; +import java.util.Locale; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.S2Geography.WKBReader; +import org.apache.sedona.common.S2Geography.WKBWriter; +import org.apache.sedona.common.S2Geography.WKTReader; +import org.locationtech.jts.io.ByteOrderValues; +import org.locationtech.jts.io.ParseException; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class BenchPointWKB { + + @Param({"XY", "XYZ"}) + public String dim; + + @Param({"LE", "BE"}) + public String endianness; + + @Param({"1", "16", "256", "1024", "4096", "65536"}) + public int nPoints; + + // Fixtures + private String wktPoint, wktMulti; + private Geography geoPoint, geoMulti; + + // Hand-built payloads for READ benches (explicit WKB layout) + private byte[] wkbReadPointLE, wkbReadPointBE; + private byte[] wkbReadMultiLE, wkbReadMultiBE; + + @Setup(Level.Trial) + public void setup() throws ParseException, IOException { + wktPoint = buildPointWKT(dim); + wktMulti = buildMultiPointWKT(nPoints, dim); // <-- double-paren per-point + + WKTReader wktReader = new WKTReader(); + geoPoint = wktReader.read(wktPoint); + geoMulti = wktReader.read(wktMulti); + + // Precompute READ payloads with explicit layout the reader expects + boolean isXYZ = "XYZ".equals(dim); + wkbReadPointLE = buildPointWKB(true, isXYZ, 0); + wkbReadPointBE = buildPointWKB(false, isXYZ, 0); + wkbReadMultiLE = buildMultiPointWKB(true, isXYZ, nPoints); + wkbReadMultiBE = buildMultiPointWKB(false, isXYZ, nPoints); + } + + // ---------------- WRITE (Geography -> WKB) ---------------- + + @Benchmark + public double wkb_write_point(Blackhole bh, BenchPointWKB.ProfilerHook ph) throws IOException { + return write(geoPoint, bh); + } + + @Benchmark + public double wkb_write_multipoint(Blackhole bh, BenchPointWKB.ProfilerHook ph) + throws IOException { + return write(geoMulti, bh); + } + + private double write(Geography g, Blackhole bh) throws IOException { + int outDims = "XY".equals(dim) ? 2 : 3; + int order = + "LE".equals(endianness) ? ByteOrderValues.LITTLE_ENDIAN : ByteOrderValues.BIG_ENDIAN; + WKBWriter writer = new WKBWriter(outDims, order); + byte[] out = writer.write(g); + long sum = 0; + for (byte b : out) sum += (b & 0xFF); + bh.consume(out); + return (double) sum; + } + + // ---------------- READ (WKB -> Geography) ---------------- + + @Benchmark + public void wkb_read_point(Blackhole bh, BenchPointWKB.ProfilerHook ph) + throws IOException, ParseException { + byte[] src = "LE".equals(endianness) ? wkbReadPointLE : wkbReadPointBE; + readInto(src, bh); + } + + @Benchmark + public void wkb_read_multipoint(Blackhole bh, BenchPointWKB.ProfilerHook ph) + throws IOException, ParseException { + byte[] src = "LE".equals(endianness) ? wkbReadMultiLE : wkbReadMultiBE; + readInto(src, bh); + } + + private void readInto(byte[] src, Blackhole bh) throws IOException, ParseException { + WKBReader reader = new WKBReader(); + Geography g = reader.read(src); + bh.consume(g); + bh.consume(g.numShapes()); + } + + // ---------------- Hand-built WKB for READ benches ---------------- + + private static byte[] buildPointWKB(boolean little, boolean xyz, int index) { + // (endianness)1 + (type)4 + coords + int type = xyz ? 1001 : 1; + int doubles = xyz ? 3 : 2; + int len = 1 + 4 + 8 * doubles; // No coordinate count for a single Point + + ByteBuffer bb = + ByteBuffer.allocate(len).order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(type); + double[] c = pointCoord(index, xyz); + bb.putDouble(c[0]); + bb.putDouble(c[1]); + if (xyz) bb.putDouble(c[2]); + return bb.array(); + } + + private static byte[] buildMultiPointWKB(boolean little, boolean xyz, int n) { + int pointType = xyz ? 1001 : 1; + int multiType = xyz ? 1004 : 4; + int doubles = xyz ? 3 : 2; + + // header: endian(1) + type(4) + count(4) + int header = 1 + 4 + 4; + // each point: endian(1) + type(4) + doubles*8 + int perPoint = 1 + 4 + 8 * doubles; + + ByteBuffer bb = + ByteBuffer.allocate(header + n * perPoint) + .order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + + // MultiPoint header + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(multiType); + bb.putInt(n); + + // Each Point as a full WKB geometry + for (int i = 0; i < n; i++) { + bb.put(little ? (byte) 1 : (byte) 0); // inner endian + bb.putInt(pointType); // inner type (POINT) + double[] c = pointCoord(i, xyz); + bb.putDouble(c[0]); + bb.putDouble(c[1]); + if (xyz) bb.putDouble(c[2]); + } + return bb.array(); + } + + private static double[] pointCoord(int i, boolean xyz) { + double x = 10.0 + i * 0.001; + double y = 20.0 + i * 0.002; + if (!xyz) return new double[] {x, y}; + double z = (i % 13) + 0.125; + return new double[] {x, y, z}; + } + + // ---------------- WKT helpers (for writer fixtures) ---------------- + + private static String buildPointWKT(String dim) { + return "POINT" + ("XYZ".equals(dim) ? " Z" : "") + " (" + coord(0, dim) + ")"; + } + + // IMPORTANT: MULTIPOINT requires double-paren ((x y), (x y), ...) + private static String buildMultiPointWKT(int n, String dim) { + if (n <= 1) return buildPointWKT(dim); + String dimToken = "XYZ".equals(dim) ? " Z" : ""; + String pts = + IntStream.range(0, n) + .mapToObj(i -> "(" + coord(i, dim) + ")") // wrap each point! + .collect(Collectors.joining(", ")); + return "MULTIPOINT" + dimToken + " (" + pts + ")"; + } + + private static String coord(int i, String dim) { + double x = 10.0 + i * 0.001; + double y = 20.0 + i * 0.002; + if ("XY".equals(dim)) return String.format(Locale.ROOT, "%.6f %.6f", x, y); + double z = (i % 13) + 0.125; + return String.format(Locale.ROOT, "%.6f %.6f %.6f", x, y, z); + } + + // -------- Sanity: confirm shape count/bytes scale with nPoints -------- + @TearDown(Level.Trial) + public void sanity() throws IOException { + int outDims = "XY".equals(dim) ? 2 : 3; + int order = + "LE".equals(endianness) ? ByteOrderValues.LITTLE_ENDIAN : ByteOrderValues.BIG_ENDIAN; + WKBWriter w = new WKBWriter(outDims, order); + byte[] out = w.write(geoMulti); + + int coordsPerPt = "XY".equals(dim) ? 2 : 3; + int expectedApprox = 1 + 4 + 4 + nPoints * (1 + 4 + 8 * coordsPerPt); // header + N*(point) + System.out.printf( + "sanity: dim=%s order=%s n=%d shapes=%d bytes=%d (~%d)%n", + dim, endianness, nPoints, geoMulti.numShapes(), out.length, expectedApprox); + } + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolygonWKB.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolygonWKB.java new file mode 100644 index 0000000000..6d332b8358 --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolygonWKB.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.*; +import java.util.Locale; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.S2Geography.WKBReader; +import org.apache.sedona.common.S2Geography.WKBWriter; +import org.apache.sedona.common.S2Geography.WKTReader; +import org.locationtech.jts.io.ByteOrderValues; +import org.locationtech.jts.io.ParseException; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class BenchPolygonWKB { + /** XY or XYZ */ + @Param({"XY", "XYZ"}) + public String dim; + + /** number of polygons in MULTIPOLYGON */ + @Param({"1", "16", "256", "1024"}) + public int nPolygons; + + /** vertices per polygon outer ring (>=4; last coord auto‑closed) */ + @Param({"4", "16", "256", "1024"}) + public int nVerticesPerRing; + + /** WKB endianness */ + @Param({"LE", "BE"}) + public String endianness; + + // ---- Fixtures (prepared once per trial) ---- + private String wktPolygon; + private String wktMultiPolygon; + private Geography geoPolygon; + private Geography geoMultiPolygon; + // Payloads for READ benchmarks are now hand-built + private byte[] wkbReadPolygonLE, wkbReadPolygonBE; + private byte[] wkbReadMultiLE, wkbReadMultiBE; + + @Setup(Level.Trial) + public void setup() throws ParseException, IOException { + int v = Math.max(4, nVerticesPerRing); + int p = Math.max(1, nPolygons); + + // WKT and Geography objects are still needed for the WRITE benchmarks + wktPolygon = buildPolygonWKT(v, dim, 0); + wktMultiPolygon = buildMultiPolygonWKT(p, v, dim); + WKTReader wktReader = new WKTReader(); + geoPolygon = wktReader.read(wktPolygon); + geoMultiPolygon = wktReader.read(wktMultiPolygon); + + // THE FIX: Build custom WKB for the READ benchmarks + boolean isXYZ = "XYZ".equals(dim); + wkbReadPolygonLE = buildPolygonWKB(true, isXYZ, v, 0); + wkbReadPolygonBE = buildPolygonWKB(false, isXYZ, v, 0); + wkbReadMultiLE = buildMultiPolygonWKB(true, isXYZ, p, v); + wkbReadMultiBE = buildMultiPolygonWKB(false, isXYZ, p, v); + } + + // ---- WRITE (Geography -> WKB) ---- + @Benchmark + public double wkb_write_polygon(Blackhole bh, BenchPolygonWKB.ProfilerHook ph) + throws IOException { + return write(geoPolygon, bh); + } + + @Benchmark + public double wkb_write_multipolygon(Blackhole bh, BenchPolygonWKB.ProfilerHook ph) + throws IOException { + return write(geoMultiPolygon, bh); + } + + private double write(Geography g, Blackhole bh) throws IOException { + int outDims = ("XY".equals(dim) ? 2 : 3); + int order = + "LE".equals(endianness) ? ByteOrderValues.LITTLE_ENDIAN : ByteOrderValues.BIG_ENDIAN; + WKBWriter writer = new WKBWriter(outDims, order); + byte[] out = writer.write(g); + long sum = 0; + for (byte b : out) sum += (b & 0xFF); // prevent DCE + bh.consume(out); + return (double) sum; + } + + // ---- READ (WKB -> Geography) ---- + @Benchmark + public void wkb_read_polygon(Blackhole bh, BenchPolygonWKB.ProfilerHook ph) + throws IOException, ParseException { + read(("LE".equals(endianness) ? wkbReadPolygonLE : wkbReadPolygonBE), bh); + } + + @Benchmark + public void wkb_read_multipolygon(Blackhole bh, BenchPolygonWKB.ProfilerHook ph) + throws IOException, ParseException { + read(("LE".equals(endianness) ? wkbReadMultiLE : wkbReadMultiBE), bh); + } + + private void read(byte[] src, Blackhole bh) throws IOException, ParseException { + WKBReader reader = new WKBReader(); + Geography g = reader.read(src); + bh.consume(g); + bh.consume(g.numShapes()); + } + + // ---- Hand-built WKB for READ benches ---- + + private static byte[] buildPolygonWKB(boolean little, boolean xyz, int nVertices, int polyIndex) { + int type = xyz ? 1003 : 3; // Polygon type + int doubles = xyz ? 3 : 2; + // endian, type, num_rings, num_coords, coords + int len = 1 + 4 + 4 + 4 + (8 * doubles * nVertices); + + ByteBuffer bb = + ByteBuffer.allocate(len).order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(type); + bb.putInt(1); // num_rings = 1 (simple polygon with no holes) + bb.putInt(nVertices); + + // Build coordinates for the ring + double cx = -170.0 + polyIndex * 0.5; + double cy = -60.0 + polyIndex * 0.5; + double rDeg = 0.1 + (polyIndex % 5) * 0.02; + + for (int i = 0; i < nVertices - 1; i++) { + double ang = (2.0 * Math.PI * i) / (nVertices - 1); + double x = cx + rDeg * Math.cos(ang); + double y = cy + rDeg * Math.sin(ang); + putCoord(bb, x, y, xyz, i); + } + // Close the ring + putCoord(bb, cx + rDeg, cy, xyz, 0); + + return bb.array(); + } + + private static byte[] buildMultiPolygonWKB( + boolean little, boolean xyz, int nPolys, int nVertices) { + int multiType = xyz ? 1006 : 6; // MultiPolygon type + int header = 1 + 4 + 4; // endian, type, num_polygons + // Get the size of a single polygon WKB to calculate total length + byte[] singlePoly = buildPolygonWKB(little, xyz, nVertices, 0); + int len = header + (nPolys * singlePoly.length); + + ByteBuffer bb = + ByteBuffer.allocate(len).order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(multiType); + bb.putInt(nPolys); + for (int i = 0; i < nPolys; i++) { + // The reader expects each polygon to be a full, self-contained WKB geometry + bb.put(buildPolygonWKB(little, xyz, nVertices, i)); + } + return bb.array(); + } + + private static void putCoord(ByteBuffer bb, double x, double y, boolean xyz, int i) { + bb.putDouble(x); + bb.putDouble(y); + if (xyz) { + double z = (i % 19) + 0.5; + bb.putDouble(z); + } + } + + // ---- Helpers: WKT generators ---- + + private static String buildPolygonWKT(int verticesPerRing, String dim, int polyIndex) { + String dimToken = "XYZ".equals(dim) ? " Z" : ""; + String ring = buildRing(verticesPerRing, dim, polyIndex); + return "POLYGON" + dimToken + " ((" + ring + "))"; + } + + private static String buildMultiPolygonWKT(int p, int verticesPerRing, String dim) { + String dimToken = "XYZ".equals(dim) ? " Z" : ""; + String polys = + IntStream.range(0, p) + .mapToObj(i -> "((" + buildRing(verticesPerRing, dim, i) + "))") + .collect(Collectors.joining(", ")); + return "MULTIPOLYGON" + dimToken + " (" + polys + ")"; + } + + private static String buildRing(int v, String dim, int polyIndex) { + if (v < 4) throw new IllegalArgumentException("Polygon ring must have >= 4 vertices"); + double cx = -170.0 + polyIndex * 0.5; + double cy = -60.0 + polyIndex * 0.5; + double rDeg = 0.1 + (polyIndex % 5) * 0.02; + + String vertices = + IntStream.range(0, v - 1) + .mapToObj( + i -> { + double ang = (2.0 * Math.PI * i) / (v - 1); + double x = cx + rDeg * Math.cos(ang); + double y = cy + rDeg * Math.sin(ang); + return formatCoord(x, y, dim, i); + }) + .collect(Collectors.joining(", ")); + + String first = formatCoord(cx + rDeg, cy, dim, 0); + return vertices + ", " + first; + } + + private static String formatCoord(double x, double y, String dim, int i) { + if ("XY".equals(dim)) { + return String.format(Locale.ROOT, "%.6f %.6f", x, y); + } else { + double z = (i % 19) + 0.5; + return String.format(Locale.ROOT, "%.6f %.6f %.6f", x, y, z); + } + } + + // ===================================================================== + // == Async-profiler hook (runs inside the fork) == + // ===================================================================== + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolylineWKB.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolylineWKB.java new file mode 100644 index 0000000000..230d04b63d --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/BenchPolylineWKB.java @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.*; +import java.util.Locale; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.S2Geography.WKBReader; +import org.apache.sedona.common.S2Geography.WKBWriter; +import org.apache.sedona.common.S2Geography.WKTReader; +import org.locationtech.jts.io.ByteOrderValues; +import org.locationtech.jts.io.ParseException; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class BenchPolylineWKB { + + /** XY or XYZ */ + @Param({"XY", "XYZ"}) + public String dim; + + /** number of lines in MULTILINESTRING */ + @Param({"1", "16", "256", "1024"}) + public int nLines; + + /** number of vertices per line (min 2) */ + @Param({"2", "16", "256", "1024"}) + public int nVerticesPerLine; + + /** WKB endianness */ + @Param({"LE", "BE"}) + public String endianness; + + // ---- Fixtures (prepared once per trial) ---- + private String wktLine; + private String wktMulti; + private Geography geoLine; + private Geography geoMulti; + private byte[] wkbReadSingleLE, wkbReadSingleBE; + private byte[] wkbReadMultiLE, wkbReadMultiBE; + + @Setup(Level.Trial) + public void setup() throws ParseException, IOException { + wktLine = buildLineWKT(Math.max(2, nVerticesPerLine), dim); + wktMulti = buildMultiLineWKT(Math.max(1, nLines), Math.max(2, nVerticesPerLine), dim); + + WKTReader wktReader = new WKTReader(); + geoLine = wktReader.read(wktLine); + geoMulti = wktReader.read(wktMulti); + + boolean isXYZ = "XYZ".equals(dim); + wkbReadSingleLE = buildLineWKB(true, isXYZ, nVerticesPerLine); + wkbReadSingleBE = buildLineWKB(false, isXYZ, nVerticesPerLine); + wkbReadMultiLE = buildMultiLineWKB(true, isXYZ, nLines, nVerticesPerLine); + wkbReadMultiBE = buildMultiLineWKB(false, isXYZ, nLines, nVerticesPerLine); + } + + // ---- WRITE (Geography -> WKB) ---- + @Benchmark + public double wkb_write_line(Blackhole bh, BenchPolylineWKB.ProfilerHook ph) throws IOException { + return write(geoLine, bh); + } + + @Benchmark + public double wkb_write_multiline(Blackhole bh, BenchPolylineWKB.ProfilerHook ph) + throws IOException { + return write(geoMulti, bh); + } + + private double write(Geography g, Blackhole bh) throws IOException { + int outDims = ("XY".equals(dim) ? 2 : 3); + int order = + "LE".equals(endianness) ? ByteOrderValues.LITTLE_ENDIAN : ByteOrderValues.BIG_ENDIAN; + WKBWriter writer = new WKBWriter(outDims, order); + byte[] out = writer.write(g); + long sum = 0; + for (byte b : out) sum += (b & 0xFF); + bh.consume(out); + return (double) sum; + } + + // ---- READ (WKB -> Geography) ---- + @Benchmark + public void wkb_read_line(Blackhole bh, BenchPolylineWKB.ProfilerHook ph) + throws IOException, ParseException { + read(("LE".equals(endianness) ? wkbReadSingleLE : wkbReadSingleBE), bh); + } + + @Benchmark + public void wkb_read_multiline(Blackhole bh, BenchPolylineWKB.ProfilerHook ph) + throws IOException, ParseException { + read(("LE".equals(endianness) ? wkbReadMultiLE : wkbReadMultiBE), bh); + } + + private void read(byte[] src, Blackhole bh) throws IOException, ParseException { + WKBReader reader = new WKBReader(); + Geography g = reader.read(src); + bh.consume(g); + bh.consume(g.numShapes()); + } + + // ---- Hand-built WKB for READ benches ---- + + private static byte[] buildLineWKB(boolean little, boolean xyz, int nVertices) { + int type = xyz ? 1002 : 2; // LineString type + int doubles = xyz ? 3 : 2; + int len = 1 + 4 + 4 + (8 * doubles * nVertices); // endian, type, count, coords + + ByteBuffer bb = + ByteBuffer.allocate(len).order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(type); + bb.putInt(nVertices); + for (int i = 0; i < nVertices; i++) { + double[] c = pointCoord(i, xyz); + bb.putDouble(c[0]); + bb.putDouble(c[1]); + if (xyz) bb.putDouble(c[2]); + } + return bb.array(); + } + + private static byte[] buildMultiLineWKB(boolean little, boolean xyz, int nLines, int nVertices) { + int multiType = xyz ? 1005 : 5; // MultiLineString type + int header = 1 + 4 + 4; // endian, type, num_lines + byte[] singleLine = buildLineWKB(little, xyz, nVertices); + int len = header + (nLines * singleLine.length); + + ByteBuffer bb = + ByteBuffer.allocate(len).order(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + bb.put(little ? (byte) 1 : (byte) 0); + bb.putInt(multiType); + bb.putInt(nLines); + for (int i = 0; i < nLines; i++) { + bb.put(singleLine); + } + return bb.array(); + } + + private static double[] pointCoord(int i, boolean xyz) { + double x = 100.0 + i * 0.001; + double y = 50.0 + i * 0.002; + if (!xyz) return new double[] {x, y}; + double z = (i % 17) + 0.375; + return new double[] {x, y, z}; + } + + // ---- Helpers ---- + private static String buildLineWKT(int nVertices, String dim) { + String dimToken = "XYZ".equals(dim) ? " Z" : ""; + String coords = + IntStream.range(0, nVertices) + .mapToObj(i -> coord(i, dim)) + .collect(Collectors.joining(", ")); + return "LINESTRING" + dimToken + " (" + coords + ")"; + } + + private static String buildMultiLineWKT(int numLines, int numVerticesPerLine, String dim) { + String dimToken = "XYZ".equals(dim) ? " Z" : ""; + String lines = + IntStream.range(0, numLines) + .mapToObj( + i -> { + String coords = + IntStream.range(0, numVerticesPerLine) + .mapToObj(j -> coord(i * numVerticesPerLine + j, dim)) + .collect(Collectors.joining(", ")); + return "(" + coords + ")"; + }) + .collect(Collectors.joining(", ")); + return "MULTILINESTRING" + dimToken + " (" + lines + ")"; + } + + // emit deterministic coords; add non-zero Z in XYZ + private static String coord(int i, String dim) { + double x = 100.0 + i * 0.001; + double y = 50.0 + i * 0.002; + if ("XY".equals(dim)) { + return String.format(Locale.ROOT, "%.6f %.6f", x, y); + } else { + double z = (i % 17) + 0.375; + return String.format(Locale.ROOT, "%.6f %.6f %.6f", x, y, z); + } + } + + // ===================================================================== + // == Async-profiler hook (runs inside the fork) == + // ===================================================================== + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPoint.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPoint.java new file mode 100644 index 0000000000..6d003d23c9 --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPoint.java @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import com.esotericsoftware.kryo.io.Output; +import com.esotericsoftware.kryo.io.UnsafeInput; +import com.google.common.geometry.PrimitiveArrays; +import com.google.common.geometry.S2LatLng; +import com.google.common.geometry.S2Point; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.EncodeOptions; +import org.apache.sedona.common.S2Geography.EncodeTag; +import org.apache.sedona.common.S2Geography.Geography; +import org.apache.sedona.common.S2Geography.PointGeography; +import org.apache.sedona.common.S2Geography.SinglePointGeography; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(2) +@State(Scope.Thread) +public class DecodeBenchPoint { + + // -------- Params -------- + @Param({"1", "16", "256", "1024", "4096", "65536"}) + public int points; + + @Param({"XY", "XYZ"}) + public String dimension; + + @Param({"COMPACT", "FAST"}) + public String pointEncoding; + + // -------- Reused points -------- + private List pts; + + // -------- Tagged payloads (POINT/MULTIPOINT) -------- + private byte[] taggedPointBytes; + private byte[] taggedMultiPointBytes; + private UnsafeInput taggedPointIn; + private UnsafeInput taggedMultiPointIn; + + // -------- Raw coder payloads (using the SAME pts) -------- + private PrimitiveArrays.Bytes rawCompactBytesAdapter; + private PrimitiveArrays.Cursor compactCur; + private PrimitiveArrays.Bytes rawFastBytesAdapter; + private PrimitiveArrays.Cursor fastCur; + + // ---------------- Setup ---------------- + @Setup(Level.Trial) + public void setup() throws Exception { + pts = buildPoints(points); + + // --- Tagged POINT --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new SinglePointGeography(pts.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedPointBytes = baos.toByteArray(); + } + + // --- Tagged MULTIPOINT --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PointGeography(pts); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedMultiPointBytes = baos.toByteArray(); + } + + // --- Raw coder payloads from the SAME points (COMPACT & FAST) --- + byte[] rawCompactBytes = encodePointsPayload(pts, /*compact=*/ true); + byte[] rawFastBytes = encodePointsPayload(pts, /*compact=*/ false); + + // Inputs for tagged payloads + taggedPointIn = new UnsafeInput(taggedPointBytes); + taggedMultiPointIn = new UnsafeInput(taggedMultiPointBytes); + + // Bytes adapters & cursors + rawCompactBytesAdapter = bytesOverArray(rawCompactBytes); + compactCur = rawCompactBytesAdapter.cursor(); + rawFastBytesAdapter = bytesOverArray(rawFastBytes); + fastCur = rawFastBytesAdapter.cursor(); + + System.out.printf( + "points=%d enc=%s tagged[POINT]=%dB tagged[MULTIPOINT]=%dB rawCompact=%dB rawFast=%dB%n", + points, + pointEncoding, + taggedPointBytes.length, + taggedMultiPointBytes.length, + rawCompactBytes.length, + rawFastBytes.length); + } + + @Setup(Level.Invocation) + public void rewind() { + // Rewind Kryo inputs + taggedPointIn.rewind(); + taggedMultiPointIn.rewind(); + // Reset S2 cursor positions + compactCur.position = 0; + fastCur.position = 0; + } + + // ===================================================================== + // == Benchmarks (TAGGED) == + // ===================================================================== + + @Benchmark + public void tagged_point_decode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + Geography g = Geography.decodeTagged(taggedPointIn); + bh.consume(g); + } + + @Benchmark + public void tagged_multipoint_decode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + // Note: profiling is handled per-iteration by ProfilerHook; keep the body clean. + Geography g = Geography.decodeTagged(taggedMultiPointIn); + bh.consume(g); + } + + @Benchmark + public double tagged_multipoint_encode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PointGeography(pts); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public double tagged_point_encode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new SinglePointGeography(pts.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public int tagged_multipoint_tagOnly() throws IOException { + // Header + covering + count only (no point decode) + EncodeTag tag = EncodeTag.decode(taggedMultiPointIn); + tag.skipCovering(taggedMultiPointIn); + int n = taggedMultiPointIn.readInt(false); // varint length of payload we wrote + return n; + } + + // ===================================================================== + // == Benchmarks (RAW S2 coder) == + // ===================================================================== + + @Benchmark + public double raw_S2points_compact_decode(DecodeBenchPoint.ProfilerHook ph) throws IOException { + List out = S2Point.Shape.COMPACT_CODER.decode(rawCompactBytesAdapter, compactCur); + double acc = 0; + for (int i = 0; i < out.size(); i++) { + S2Point p = out.get(i); + acc += p.getX() + p.getY() + p.getZ(); + } + return acc; // returning prevents DCE + } + + @Benchmark + public double raw_S2points_fast_decode(DecodeBenchPoint.ProfilerHook ph) throws IOException { + List out = S2Point.Shape.FAST_CODER.decode(rawFastBytesAdapter, fastCur); + double acc = 0; + for (int i = 0; i < out.size(); i++) { + S2Point p = out.get(i); + acc += p.getX() + p.getY() + p.getZ(); + } + return acc; // returning prevents DCE + } + + @Benchmark + public double raw_S2points_compact_encode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + S2Point.Shape.COMPACT_CODER.encode(S2Point.Shape.fromList(pts), out); + // Materialize once to make the work observable & defeat DCE + byte[] arr = out.toBytes(); + long s = 0; + for (byte b : arr) s += (b & 0xFF); + bh.consume(arr); + return (double) s; + } + + @Benchmark + public double raw_S2points_fast_encode(DecodeBenchPoint.ProfilerHook ph, Blackhole bh) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + S2Point.Shape.FAST_CODER.encode(S2Point.Shape.fromList(pts), out); + // Materialize once to make the work observable & defeat DCE + byte[] arr = out.toBytes(); + long s = 0; + for (byte b : arr) s += (b & 0xFF); + bh.consume(arr); + return (double) s; + } + + // ===================================================================== + // == Helpers == + // ===================================================================== + + /** Encode the list of S2Points into the raw S2 payload for COMPACT/FAST coder. */ + static byte[] encodePointsPayload(List points, boolean compact) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + if (compact) { + S2Point.Shape.COMPACT_CODER.encode(S2Point.Shape.fromList(points), out); + } else { + S2Point.Shape.FAST_CODER.encode(S2Point.Shape.fromList(points), out); + } + out.flush(); + return baos.toByteArray(); + } + + private static List buildPoints(int n) { + // Deterministic pseudo-grid; S2Point is inherently 3D, 'dimension' is retained as a param only + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + double lat = -60.0 + (i % 120); + double lng = -170.0 + (i % 340); + out.add(S2LatLng.fromDegrees(lat, lng).toPoint()); + } + return out; + } + + private static void applyPointEncodingPreference(EncodeOptions opts, String enc) { + if ("COMPACT".equals(enc)) { + opts.setCodingHint(EncodeOptions.CodingHint.COMPACT); + opts.setEnableLazyDecode(false); + } else if ("FAST".equals(enc)) { + opts.setCodingHint(EncodeOptions.CodingHint.FAST); + opts.setEnableLazyDecode(true); + } + } + + private static PrimitiveArrays.Bytes bytesOverArray(byte[] arr) { + return bytesOverSlice(arr, 0, arr.length); + } + + private static PrimitiveArrays.Bytes bytesOverSlice(byte[] buf, int off, int len) { + return new PrimitiveArrays.Bytes() { + @Override + public long length() { + return len; + } + + @Override + public byte get(long i) { + return buf[off + (int) i]; + } + }; + } + + // ===================================================================== + // == Async-profiler hook (per-iteration, not inside the benchmark) == + // ===================================================================== + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolygon.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolygon.java new file mode 100644 index 0000000000..ddad731680 --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolygon.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import com.esotericsoftware.kryo.io.Output; +import com.esotericsoftware.kryo.io.UnsafeInput; +import com.google.common.geometry.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.*; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class DecodeBenchPolygon { + + // -------- Params -------- + @Param({"1", "16", "256", "1024"}) + public int numPolygons; + + @Param({"4", "16", "256", "1024"}) + public int verticesPerPolygon; + + @Param({"XY", "XYZ"}) + public String dimension; + + @Param({"COMPACT"}) + public String pointEncoding; + + // -------- Reused geometries -------- + private List polygons; + + // -------- Tagged payloads (POLYGON/MULTIPOLYGON) -------- + private byte[] taggedPolygonBytes; + private byte[] taggedMultiPolygonBytes; + private UnsafeInput taggedPolygonIn; + private UnsafeInput taggedMultiPolygonIn; + + // -------- Raw coder payloads (using the SAME polygons) -------- + private byte[] rawMultiPolygonBytes; + private UnsafeInput rawMultiPolygonIn; + + // ---------------- Setup ---------------- + @Setup(Level.Trial) + public void setup() throws Exception { + polygons = buildPolygons(numPolygons, verticesPerPolygon); + + // --- Tagged POLYGON --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PolygonGeography(polygons.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedPolygonBytes = baos.toByteArray(); + } + + // --- Tagged MULTIPOLYGON --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new MultiPolygonGeography(Geography.GeographyKind.MULTIPOLYGON, polygons); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedMultiPolygonBytes = baos.toByteArray(); + } + + // --- Raw S2 coder payload from the SAME polygons --- + rawMultiPolygonBytes = encodeMultiPolygonPayload(polygons); + + // Inputs for tagged payloads + taggedPolygonIn = new UnsafeInput(taggedPolygonBytes); + taggedMultiPolygonIn = new UnsafeInput(taggedMultiPolygonBytes); + + // Input for raw payload + rawMultiPolygonIn = new UnsafeInput(rawMultiPolygonBytes); + + System.out.printf( + "numPolygons=%d, verticesPerPolygon=%d, enc=%s, tagged[POLYGON]=%dB, tagged[MULTIPOLYGON]=%dB, rawS2=%dB%n", + numPolygons, + verticesPerPolygon, + pointEncoding, + taggedPolygonBytes.length, + taggedMultiPolygonBytes.length, + rawMultiPolygonBytes.length); + } + + @Setup(Level.Invocation) + public void rewind() { + // Rewind Kryo inputs + taggedPolygonIn.rewind(); + taggedMultiPolygonIn.rewind(); + rawMultiPolygonIn.rewind(); + } + + // ===================================================================== + // == Benchmarks (TAGGED) == + // ===================================================================== + + @Benchmark + public void tagged_polygon_full(DecodeBenchPolygon.ProfilerHook ph, Blackhole bh) + throws IOException { + Geography g = Geography.decodeTagged(taggedPolygonIn); + bh.consume(g); + } + + @Benchmark + public void tagged_multipolygon_decode(DecodeBenchPolygon.ProfilerHook ph, Blackhole bh) + throws IOException { + Geography g = Geography.decodeTagged(taggedMultiPolygonIn); + bh.consume(g); + } + + @Benchmark + public double tagged_polygon_encode(DecodeBenchPolygon.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PolygonGeography(polygons.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public double tagged_multipolygon_encode(DecodeBenchPolygon.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new MultiPolygonGeography(Geography.GeographyKind.MULTIPOLYGON, polygons); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public int tagged_multipolygon_tagOnly() throws IOException { + // Header + covering + count only (no geometry decode) + EncodeTag tag = EncodeTag.decode(taggedMultiPolygonIn); + tag.skipCovering(taggedMultiPolygonIn); + int n = taggedMultiPolygonIn.readInt(false); // varint length of payload we wrote + return n; + } + + // ===================================================================== + // == Benchmarks (RAW S2 coder) == + // ===================================================================== + + @Benchmark + public double raw_S2multipolygon_decode(DecodeBenchPolygon.ProfilerHook ph) throws IOException { + int b0 = rawMultiPolygonIn.read(); + int b1 = rawMultiPolygonIn.read(); + int b2 = rawMultiPolygonIn.read(); + int b3 = rawMultiPolygonIn.read(); + int count = (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24); + + List out = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + S2Polygon p = S2Polygon.decode(rawMultiPolygonIn); + out.add(p); + } + double acc = 0; + // Consume the data to prevent Dead Code Elimination (DCE) + for (S2Polygon polygon : out) { + acc += polygon.numLoops(); + } + return acc; + } + + @Benchmark + public double raw_S2polygon_compact_encode(DecodeBenchPolygon.ProfilerHook ph, Blackhole bh) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + // LITTLE-ENDIAN count: + int n = polygons.size(); + out.writeByte(n & 0xFF); + out.writeByte((n >>> 8) & 0xFF); + out.writeByte((n >>> 16) & 0xFF); + out.writeByte((n >>> 24) & 0xFF); + + for (S2Polygon polygon : polygons) { + // Use the high-level API: writes the coder id + payload that S2Polyline.decode expects + S2Polygon.COMPACT_CODER.encode(polygon, out); + } + out.flush(); + // Materialize once to make the work observable & defeat DCE + byte[] arr = out.toBytes(); + long s = 0; + for (byte b : arr) s += (b & 0xFF); + bh.consume(arr); + return (double) s; + } + + // ===================================================================== + // == Helpers == + // ===================================================================== + + /** + * Encode the list of S2Polygons into a raw S2 payload. The format is [count (varint)] + * [polygon_1_bytes] [polygon_2_bytes] ... + */ + static byte[] encodeMultiPolygonPayload(List polygons) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + // LITTLE-ENDIAN count: + int n = polygons.size(); + out.writeByte(n & 0xFF); + out.writeByte((n >>> 8) & 0xFF); + out.writeByte((n >>> 16) & 0xFF); + out.writeByte((n >>> 24) & 0xFF); + + for (S2Polygon polygon : polygons) { + polygon.encode(out); + } + out.flush(); + return baos.toByteArray(); + } + + /** Builds a list of simple, non-overlapping S2Polygons. */ + public static List buildPolygons(int numPolygons, int verticesPerPolygon) { + List result = new ArrayList<>(numPolygons); + double radiusDegrees = 0.1; // Small radius for each polygon's vertices from its center + for (int j = 0; j < numPolygons; j++) { + // Shift each polygon center to avoid major overlaps + double centerLat = -60.0 + j * 0.5; + double centerLng = -170.0 + j * 0.5; + S2Point center = S2LatLng.fromDegrees(centerLat, centerLng).toPoint(); + + List vertices = new ArrayList<>(); + for (int i = 0; i < verticesPerPolygon; i++) { + double angle = 2 * Math.PI * i / verticesPerPolygon; + // Create points in a circle around the center point. Note: This is an approximation on a + // sphere. + double lat = centerLat + radiusDegrees * Math.cos(angle); + double lng = centerLng + radiusDegrees * Math.sin(angle); + vertices.add(S2LatLng.fromDegrees(lat, lng).toPoint()); + } + S2Loop loop = new S2Loop(vertices); + // Ensure the loop has the correct orientation (counter-clockwise for shells) + loop.normalize(); + result.add(new S2Polygon(loop)); + } + return result; + } + + private static void applyPointEncodingPreference(EncodeOptions opts, String enc) { + if ("COMPACT".equals(enc)) { + opts.setEnableLazyDecode(false); + opts.setCodingHint(EncodeOptions.CodingHint.COMPACT); + } else { + opts.setEnableLazyDecode(true); + } + } + + // ===================================================================== + // == Async-profiler hook (per-iteration, not inside the benchmark) == + // ===================================================================== + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolyline.java b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolyline.java new file mode 100644 index 0000000000..9ae1ef70d6 --- /dev/null +++ b/geographyBench/src/jmh/java/org/apache/sedona/bench/DecodeBenchPolyline.java @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.bench; + +import com.esotericsoftware.kryo.io.Output; +import com.esotericsoftware.kryo.io.UnsafeInput; +import com.google.common.geometry.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import one.profiler.AsyncProfiler; +import org.apache.sedona.common.S2Geography.*; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.runner.IterationType; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@State(Scope.Thread) +public class DecodeBenchPolyline { + + // -------- Params -------- + @Param({"1", "16", "256", "1024"}) + public int numPolylines; + + @Param({"2", "16", "256", "1024"}) + public int verticesPerPolyline; + + @Param({"XY", "XYZ"}) + public String dimension; + + @Param({"COMPACT"}) + public String pointEncoding; + + // -------- Reused geometries -------- + private List polylines; + + // -------- Tagged payloads (POLYLINE/MULTIPOLYLINE) -------- + private byte[] taggedPolylineBytes; + private byte[] taggedMultiPolylineBytes; + private UnsafeInput taggedPolylineIn; + private UnsafeInput taggedMultiPolylineIn; + + // -------- Raw coder payloads (using the SAME polylines) -------- + private byte[] rawMultiPolylineBytes; + private UnsafeInput rawMultiPolylineIn; + + // ---------------- Setup ---------------- + @Setup(Level.Trial) + public void setup() throws Exception { + polylines = buildPolylines(numPolylines, verticesPerPolyline); + + // --- Tagged POLYLINE --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new SinglePolylineGeography(polylines.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedPolylineBytes = baos.toByteArray(); + } + + // --- Tagged MULTIPOLYLINE --- + { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PolylineGeography(polylines); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + taggedMultiPolylineBytes = baos.toByteArray(); + } + + // --- Raw S2 coder payload from the SAME polylines --- + rawMultiPolylineBytes = encodeMultiPolylinePayload(polylines); + + // Inputs for tagged payloads + taggedPolylineIn = new UnsafeInput(taggedPolylineBytes); + taggedMultiPolylineIn = new UnsafeInput(taggedMultiPolylineBytes); + + // Input for raw payload + rawMultiPolylineIn = new UnsafeInput(rawMultiPolylineBytes); + + System.out.printf( + "numPolylines=%d, verticesPerPolyline=%d, enc=%s, tagged[POLYLINE]=%dB, tagged[MULTIPOLYLINE]=%dB, rawS2=%dB%n", + numPolylines, + verticesPerPolyline, + pointEncoding, + taggedPolylineBytes.length, + taggedMultiPolylineBytes.length, + rawMultiPolylineBytes.length); + } + + @Setup(Level.Invocation) + public void rewind() { + // Rewind Kryo inputs + taggedPolylineIn.rewind(); + taggedMultiPolylineIn.rewind(); + rawMultiPolylineIn.rewind(); + } + + // ===================================================================== + // == Benchmarks (TAGGED) == + // ===================================================================== + + @Benchmark + public void tagged_polyline_decode(DecodeBenchPolyline.ProfilerHook ph, Blackhole bh) + throws IOException { + Geography g = Geography.decodeTagged(taggedPolylineIn); + bh.consume(g); + } + + @Benchmark + public void tagged_multipolyline_decode(DecodeBenchPolyline.ProfilerHook ph, Blackhole bh) + throws IOException { + // Note: profiling is handled per-iteration by ProfilerHook; keep the body clean. + Geography g = Geography.decodeTagged(taggedMultiPolylineIn); + bh.consume(g); + } + + @Benchmark + public double tagged_polyline_encode(DecodeBenchPolyline.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PolylineGeography(polylines.get(0)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public double tagged_multipolyline_encode(DecodeBenchPolyline.ProfilerHook ph, Blackhole bh) + throws IOException { + EncodeOptions opts = new EncodeOptions(); + applyPointEncodingPreference(opts, pointEncoding); + Geography g = new PolylineGeography(polylines); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + g.encodeTagged(baos, opts); + byte[] data = baos.toByteArray(); + long s = 0; + for (byte b : data) s += (b & 0xFF); + bh.consume(data); + return (double) s; + } + + @Benchmark + public int tagged_multipolyline_tagOnly() throws IOException { + // Header + covering + count only (no geometry decode) + EncodeTag tag = EncodeTag.decode(taggedMultiPolylineIn); + tag.skipCovering(taggedMultiPolylineIn); + int n = taggedMultiPolylineIn.readInt(false); // varint length of payload we wrote + return n; + } + + // ===================================================================== + // == Benchmarks (RAW S2 coder) == + // ===================================================================== + + @Benchmark + public double raw_S2multipolyline_decode(DecodeBenchPolyline.ProfilerHook ph) throws IOException { + int b0 = rawMultiPolylineIn.read(); + int b1 = rawMultiPolylineIn.read(); + int b2 = rawMultiPolylineIn.read(); + int b3 = rawMultiPolylineIn.read(); + int count = (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24); + + List out = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + S2Polyline p = S2Polyline.decode(rawMultiPolylineIn); + out.add(p); + } + double acc = 0; + // Consume the data to prevent Dead Code Elimination (DCE) + for (S2Polyline polyline : out) { + if (polyline.numVertices() > 0) { + S2Point p = polyline.vertex(0); + acc += p.getX() + p.getY() + p.getZ(); + } + } + return acc; + } + + @Benchmark + public double raw_S2polyline_compact_encode(DecodeBenchPolyline.ProfilerHook ph, Blackhole bh) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + // LITTLE-ENDIAN count: + int n = polylines.size(); + out.writeByte(n & 0xFF); + out.writeByte((n >>> 8) & 0xFF); + out.writeByte((n >>> 16) & 0xFF); + out.writeByte((n >>> 24) & 0xFF); + + for (S2Polyline polyline : polylines) { + // Use the high-level API: writes the coder id + payload that S2Polyline.decode expects + S2Polyline.COMPACT_CODER.encode(polyline, out); + } + out.flush(); + // Materialize once to make the work observable & defeat DCE + byte[] arr = out.toBytes(); + long s = 0; + for (byte b : arr) s += (b & 0xFF); + bh.consume(arr); + return (double) s; + } + + // ===================================================================== + // == Helpers == + // ===================================================================== + + /** + * Encode the list of S2Polylines into a raw S2 payload. The format is [count (varint)] + * [polyline_1_bytes] [polyline_2_bytes] ... + */ + static byte[] encodeMultiPolylinePayload(List polylines) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output out = new Output(baos); + + // Write count as a fixed 4-byte int (match reader) + // LITTLE-ENDIAN count: + int n = polylines.size(); + out.writeByte(n & 0xFF); + out.writeByte((n >>> 8) & 0xFF); + out.writeByte((n >>> 16) & 0xFF); + out.writeByte((n >>> 24) & 0xFF); + + for (S2Polyline polyline : polylines) { + // Use the high-level API: writes the coder id + payload that S2Polyline.decode expects + S2Polyline.COMPACT_CODER.encode(polyline, out); + } + out.flush(); + return baos.toByteArray(); + } + + public static List buildPolylines(int numPolylines, int verticesPerPolyline) { + List result = new ArrayList<>(numPolylines); + for (int j = 0; j < numPolylines; j++) { + List vertices = new ArrayList<>(verticesPerPolyline); + for (int i = 0; i < verticesPerPolyline; i++) { + // Shift each polyline so they don’t overlap completely + double latDeg = -60.0 + (i % 120) + j * 0.5; + double lngDeg = -170.0 + (i % 340) + j * 0.5; + S2LatLng ll = S2LatLng.fromDegrees(latDeg, lngDeg).normalized(); + vertices.add(ll.toPoint()); + } + result.add(new S2Polyline(vertices)); + } + return result; + } + + private static void applyPointEncodingPreference(EncodeOptions opts, String enc) { + // Adjust to your real EncodeOptions API; placeholder keeps parity with your earlier code. + // If you have an explicit "use compact vs fast" switch, set it here. + if ("COMPACT".equals(enc)) { + opts.setEnableLazyDecode(false); + opts.setCodingHint(EncodeOptions.CodingHint.COMPACT); + } else { + opts.setEnableLazyDecode(true); + } + } + + // ===================================================================== + // == Async-profiler hook (per-iteration, not inside the benchmark) == + // ===================================================================== + + // -------- Async-profiler hook (runs inside fork) -------- + /** Per-iteration profiler: start on measurement iterations, stop after each iteration. */ + @State(Scope.Benchmark) + public static class ProfilerHook { + @Param({"cpu"}) + public String event; + + @Param({"jfr"}) + public String format; + + @Param({"1ms"}) + public String interval; + + private AsyncProfiler profiler; + private Path outDir; + + @Setup(Level.Trial) + public void trial() throws Exception { + profiler = AsyncProfiler.getInstance(); + outDir = Paths.get("profiles"); + Files.createDirectories(outDir); + } + + @Setup(Level.Iteration) + public void start(BenchmarkParams b, IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + String base = String.format("%s-iter%02d-%s", b.getBenchmark(), it.getCount(), event); + File out = + outDir + .resolve(base + (format.equalsIgnoreCase("jfr") ? ".jfr" : ".html")) + .toAbsolutePath() + .toFile(); + + // Using 'all-user' helps the profiler find the correct forked JMH process. + // The filter is removed to avoid accidentally hiding the benchmark thread. + String common = String.format("event=%s,interval=%s,cstack=fp,threads", event, interval); + + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("start," + common + ",jfr,file=" + out.getAbsolutePath()); + } else { + profiler.execute("start," + common); + System.setProperty("ap.out", out.getAbsolutePath()); + System.setProperty("ap.format", format); + } + } + + @TearDown(Level.Iteration) + public void stop(IterationParams it) throws Exception { + if (it.getType() != IterationType.MEASUREMENT) return; + if ("jfr".equalsIgnoreCase(format)) { + profiler.execute("stop"); + } else { + String file = System.getProperty("ap.out"); + String fmt = System.getProperty("ap.format", "flamegraph"); + profiler.execute(String.format("stop,file=%s,output=%s", file, fmt)); + } + } + } +} diff --git a/pom.xml b/pom.xml index bd6d0c0bc4..54230a398c 100644 --- a/pom.xml +++ b/pom.xml @@ -778,6 +778,7 @@ spark spark-shaded shade-proto + geographyBench @@ -802,6 +803,7 @@ flink-shaded snowflake shade-proto + geographyBench