Skip to content

Commit b73526b

Browse files
authored
Add micro benchmarks for unified query layer (#5043) (#5062)
* Add microbenchmark for unified query APIs * Add benchmark for unified function * Refactor javadoc and helper method * Fix spotless check failure * Address CodeRabit comment --------- (cherry picked from commit cee7f6b) Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent ef03b2e commit b73526b

5 files changed

Lines changed: 242 additions & 2 deletions

File tree

api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java renamed to api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java

File renamed without changes.

benchmarks/README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,18 @@ The microbenchmark suite is also handy for ad-hoc microbenchmarks but please rem
1010

1111
## Getting Started
1212

13-
Just run `./gradlew :benchmarks:jmh` from the project root directory or run specific benchmark via your IDE. It will build all microbenchmarks, execute them and print the result.
13+
Run all benchmarks from the project root directory:
14+
15+
```bash
16+
./gradlew :benchmarks:jmh
17+
```
18+
19+
Run specific benchmarks using the `-Pjmh.includes` parameter:
20+
21+
```bash
22+
./gradlew :benchmarks:jmh -Pjmh.includes='UnifiedQueryBenchmark'
23+
./gradlew :benchmarks:jmh -Pjmh.includes='UnifiedQueryBenchmark.plan.*'
24+
```
1425

1526
## Adding Microbenchmarks
1627

benchmarks/build.gradle

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
plugins {
77
id 'java-library'
8+
id "io.freefair.lombok"
89
id "me.champeau.jmh" version "0.6.8"
910
}
1011

@@ -15,6 +16,8 @@ repositories {
1516
dependencies {
1617
implementation project(':core')
1718
implementation project(':opensearch')
19+
implementation project(':api')
20+
jmhImplementation testFixtures(project(':api'))
1821

1922
// Dependencies required by JMH micro benchmark
2023
api group: 'org.openjdk.jmh', name: 'jmh-core', version: '1.36'
@@ -30,4 +33,9 @@ spotless {
3033
}
3134
}
3235

33-
compileJava.options.compilerArgs.addAll(["-processor", "org.openjdk.jmh.generators.BenchmarkProcessor"])
36+
// JMH configuration passed via command line
37+
jmh {
38+
includes = project.hasProperty('jmh.includes') ? [project.property('jmh.includes')] : []
39+
}
40+
41+
compileJava.options.compilerArgs.addAll(["-processor", "org.openjdk.jmh.generators.BenchmarkProcessor"])
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api;
7+
8+
import java.util.Arrays;
9+
import java.util.List;
10+
import java.util.concurrent.TimeUnit;
11+
import lombok.Getter;
12+
import lombok.RequiredArgsConstructor;
13+
import org.openjdk.jmh.annotations.Benchmark;
14+
import org.openjdk.jmh.annotations.BenchmarkMode;
15+
import org.openjdk.jmh.annotations.Fork;
16+
import org.openjdk.jmh.annotations.Level;
17+
import org.openjdk.jmh.annotations.Measurement;
18+
import org.openjdk.jmh.annotations.Mode;
19+
import org.openjdk.jmh.annotations.OperationsPerInvocation;
20+
import org.openjdk.jmh.annotations.OutputTimeUnit;
21+
import org.openjdk.jmh.annotations.Param;
22+
import org.openjdk.jmh.annotations.Scope;
23+
import org.openjdk.jmh.annotations.Setup;
24+
import org.openjdk.jmh.annotations.State;
25+
import org.openjdk.jmh.annotations.TearDown;
26+
import org.openjdk.jmh.annotations.Warmup;
27+
import org.openjdk.jmh.infra.Blackhole;
28+
import org.opensearch.sql.api.function.UnifiedFunction;
29+
import org.opensearch.sql.api.function.UnifiedFunctionRepository;
30+
31+
/**
32+
* JMH benchmark for measuring {@link UnifiedFunction} performance. Tests one representative
33+
* function per PPL category (json, math, conditional, collection, string).
34+
*
35+
* <p>Benchmarks:
36+
*
37+
* <ul>
38+
* <li>{@link #loadFunction()}: Measures function loading from repository
39+
* <li>{@link #evalFunction(Blackhole)}: Measures function evaluation (1000 calls per invocation)
40+
* </ul>
41+
*/
42+
@Warmup(iterations = 2, time = 1)
43+
@Measurement(iterations = 5, time = 1)
44+
@BenchmarkMode(Mode.AverageTime)
45+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
46+
@State(Scope.Thread)
47+
@Fork(value = 1)
48+
public class UnifiedFunctionBenchmark extends UnifiedQueryTestBase {
49+
50+
/** Number of function evaluations per benchmark invocation. */
51+
private static final int OPS = 1000;
52+
53+
/** Benchmark specification selecting which function to test. */
54+
@Param public BenchmarkSpec benchmarkSpec;
55+
56+
/** Repository for loading unified functions. */
57+
private UnifiedFunctionRepository repository;
58+
59+
/** Pre-loaded function instance for evaluation benchmarks. */
60+
private UnifiedFunction function;
61+
62+
/** Input arguments for function evaluation. */
63+
private List<Object> inputs;
64+
65+
@Setup(Level.Trial)
66+
public void setUpBenchmark() {
67+
super.setUp();
68+
69+
repository = new UnifiedFunctionRepository(context);
70+
function = benchmarkSpec.loadFunction(repository);
71+
inputs = benchmarkSpec.getInputs();
72+
}
73+
74+
@TearDown(Level.Trial)
75+
public void tearDownBenchmark() throws Exception {
76+
super.tearDown();
77+
}
78+
79+
/** Benchmarks function loading from repository. */
80+
@Benchmark
81+
public UnifiedFunction loadFunction() {
82+
return benchmarkSpec.loadFunction(repository);
83+
}
84+
85+
/** Benchmarks function evaluation with pre-loaded function and inputs. */
86+
@Benchmark
87+
@OperationsPerInvocation(OPS)
88+
public void evalFunction(Blackhole bh) {
89+
for (int i = 0; i < OPS; i++) {
90+
bh.consume(function.eval(inputs));
91+
}
92+
}
93+
94+
/** Enum defining benchmark test cases - one representative function per PPL category. */
95+
@RequiredArgsConstructor
96+
public enum BenchmarkSpec {
97+
JSON_EXTRACT(
98+
inputTypes("VARCHAR", "VARCHAR"),
99+
sampleInputs("{\"name\":\"test\",\"value\":42}", "$.name")),
100+
COALESCE(
101+
inputTypes("VARCHAR", "VARCHAR", "VARCHAR"),
102+
sampleInputs(null, "first_value", "default_value")),
103+
MVFIND(
104+
inputTypes("ARRAY", "VARCHAR"),
105+
sampleInputs(List.of("debug", "error", "warn", "info"), "err.*")),
106+
REX_EXTRACT(
107+
inputTypes("VARCHAR", "VARCHAR", "INTEGER"),
108+
sampleInputs("192.168.1.1 - GET /api", "(\\d+)", 1));
109+
110+
private final List<String> inputTypes;
111+
@Getter private final List<Object> inputs;
112+
113+
UnifiedFunction loadFunction(UnifiedFunctionRepository repository) {
114+
return repository
115+
.loadFunction(name())
116+
.map(desc -> desc.getBuilder().build(inputTypes))
117+
.orElseThrow();
118+
}
119+
120+
private static List<String> inputTypes(String... types) {
121+
return List.of(types);
122+
}
123+
124+
private static List<Object> sampleInputs(Object... args) {
125+
return Arrays.asList(args);
126+
}
127+
}
128+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api;
7+
8+
import java.sql.PreparedStatement;
9+
import java.util.concurrent.TimeUnit;
10+
import org.apache.calcite.rel.RelNode;
11+
import org.apache.calcite.sql.dialect.SparkSqlDialect;
12+
import org.openjdk.jmh.annotations.Benchmark;
13+
import org.openjdk.jmh.annotations.BenchmarkMode;
14+
import org.openjdk.jmh.annotations.Fork;
15+
import org.openjdk.jmh.annotations.Level;
16+
import org.openjdk.jmh.annotations.Measurement;
17+
import org.openjdk.jmh.annotations.Mode;
18+
import org.openjdk.jmh.annotations.OutputTimeUnit;
19+
import org.openjdk.jmh.annotations.Param;
20+
import org.openjdk.jmh.annotations.Scope;
21+
import org.openjdk.jmh.annotations.Setup;
22+
import org.openjdk.jmh.annotations.State;
23+
import org.openjdk.jmh.annotations.TearDown;
24+
import org.openjdk.jmh.annotations.Warmup;
25+
import org.opensearch.sql.api.compiler.UnifiedQueryCompiler;
26+
import org.opensearch.sql.api.transpiler.UnifiedQueryTranspiler;
27+
28+
/**
29+
* JMH benchmark for measuring the overhead of unified query API components when processing queries.
30+
* This provides baseline metrics and guidance for API consumers during integration.
31+
*/
32+
@Warmup(iterations = 2, time = 1)
33+
@Measurement(iterations = 5, time = 1)
34+
@BenchmarkMode(Mode.AverageTime)
35+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
36+
@State(Scope.Thread)
37+
@Fork(value = 1)
38+
public class UnifiedQueryBenchmark extends UnifiedQueryTestBase {
39+
40+
/** Common query patterns for benchmarking. */
41+
@Param({
42+
"source = catalog.employees",
43+
"source = catalog.employees | where age > 30",
44+
"source = catalog.employees | stats count() by department",
45+
"source = catalog.employees | sort - age",
46+
"source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department"
47+
})
48+
private String query;
49+
50+
/** Transpiler for converting logical plans to SQL strings. */
51+
private UnifiedQueryTranspiler transpiler;
52+
53+
/** Compiler for converting logical plans to executable statements. */
54+
private UnifiedQueryCompiler compiler;
55+
56+
@Setup(Level.Trial)
57+
public void setUpBenchmark() {
58+
super.setUp();
59+
transpiler = UnifiedQueryTranspiler.builder().dialect(SparkSqlDialect.DEFAULT).build();
60+
compiler = new UnifiedQueryCompiler(context);
61+
}
62+
63+
@TearDown(Level.Trial)
64+
public void tearDownBenchmark() throws Exception {
65+
super.tearDown();
66+
}
67+
68+
/** Benchmarks query parsing and Calcite logical plan generation. */
69+
@Benchmark
70+
public RelNode planQuery() {
71+
return planner.plan(query);
72+
}
73+
74+
/** Benchmarks the full transpilation pipeline: Query → logical plan → SQL string. */
75+
@Benchmark
76+
public String transpileToSql() {
77+
RelNode plan = planner.plan(query);
78+
return transpiler.toSql(plan);
79+
}
80+
81+
/**
82+
* Benchmarks the compilation pipeline: Query → logical plan → executable statement. The result
83+
* includes both compile and close time; close overhead is negligible and avoids resource leaking
84+
* during benchmark runs.
85+
*/
86+
@Benchmark
87+
public void compileQuery() throws Exception {
88+
RelNode plan = planner.plan(query);
89+
try (PreparedStatement stmt = compiler.compile(plan)) {
90+
// Statement is auto-closed after benchmark iteration
91+
}
92+
}
93+
}

0 commit comments

Comments
 (0)