From 96fec3e310791d28877018b32f2ab3a0af0e271f Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 7 May 2026 11:24:58 -0700 Subject: [PATCH] Carry CalciteFieldFormatCommandIT through the helper-managed index path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same shape as #5407 for CalciteEvalCommandIT. The IT's init() previously created the in-test test_eval index via direct `PUT /test_eval/_doc/N` requests, relying on dynamic mapping. Two problems: 1. The doc PUTs auto-create the index with whatever settings the cluster defaults to. The analytics-engine compatibility path (force-routing on; tests.analytics.parquet_indices=true) needs parquet-backed indices, which TestUtils.createIndexByRestClient applies via TestUtils.makeParquetBacked when the system property is set. Direct PUTs sidestep that helper, so test_eval lands as Lucene-backed and the analytics planner rejects it with "No backend can scan all requested fields on index [test_eval]". All four working tests fail at execution. 2. init() runs before every @Test method. The doc PUTs are doc-level idempotent, so re-running was wasteful but not failing. Once we switch to createIndexByRestClient, the index-level PUT is no longer idempotent and re-running throws "resource_already_exists_exception". Both addressed in one change: - test_eval is created via TestUtils.createIndexByRestClient with an explicit mapping (name/title=keyword, age=long). The helper honours tests.analytics.parquet_indices=true and produces a parquet-backed index for the analytics-engine sweep; on the v2 path the helper is a no-op around the index PUT, so behaviour is unchanged. - The whole init body is guarded by TestUtils.isIndexExist — same idempotency idiom that loadIndex uses for predefined fixtures. First @Test method provisions; subsequent methods skip. Also pins the projection order on testFieldFormatStringConcatenation. The original query (`source=test_eval | fieldformat greeting = 'Hello ' + name`) had no `| fields` clause and relied on the implicit projection's column order — v2 returns Lucene-source insertion order, analytics returns parquet-storage order (alphabetical), so the assertion only matched on v2 by coincidence. Adding `| fields name, title, age, greeting` makes the assertion deterministic across paths; the existing expected rows (`rows("Alice", "Engineer", 25, "Hello Alice")`) already match this order, so v2 behaviour is preserved. The other four tests already had explicit `| fields ...` clauses, so no change there. No semantic change for the v2 path: the explicit mapping types (keyword, long, keyword) resolve to the same PPL types ("string", "bigint", "string") that dynamic mapping inferred, and fieldformat reads from _source either way. Analytics-route compatibility goes from 1/5 to 4/5 (verified locally against a runTask cluster with analytics-engine + opensearch-sql-plugin). The remaining `testFieldFormatStringConcatenationWithNullFieldToString` needs a `tostring()` UDF on the analytics path — a multi-mode UDF (binary / hex / commas / duration) tracked separately as out of scope. Test plan: - ./gradlew :integ-test:integTest --tests 'org.opensearch.sql.calcite.remote.CalciteFieldFormatCommandIT' -> 5/5 green (v2 path, no regression). - ./gradlew :integ-test:analyticsCompatibilityTest --tests 'org.opensearch.sql.calcite.remote.CalciteFieldFormatCommandIT' -> 4/5 pass; the 5th fails on `tostring`'s missing capability registration, which is the documented out-of-scope category. Signed-off-by: Kai Huang --- .../remote/CalciteFieldFormatCommandIT.java | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java index 86f87c90c81..24c7e504224 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java @@ -12,6 +12,7 @@ import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; +import org.opensearch.sql.legacy.TestUtils; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteFieldFormatCommandIT extends PPLIntegTestCase { @@ -23,26 +24,48 @@ public void init() throws Exception { loadIndex(Index.BANK); - // Create test data for string concatenation - Request request1 = new Request("PUT", "/test_eval/_doc/1?refresh=true"); - request1.setJsonEntity("{\"name\": \"Alice\", \"age\": 25, \"title\": \"Engineer\"}"); - client().performRequest(request1); + // Pre-create test_eval through the helper so the analytics-engine compatibility run + // (tests.analytics.parquet_indices=true) provisions it as a parquet-backed composite + // index. Plain auto-mapping via the doc PUTs would create a Lucene-backed index, which + // the analytics-engine planner cannot scan ("No backend can scan all requested fields"). + // Explicit mapping pins types so both v2 (verifySchema "string"/"bigint") and analytics + // paths see the same shape regardless of dynamic-mapping behavior on the parquet engine. + // Guarded by isIndexExist for idempotency — init() runs before each @Test method. + if (!TestUtils.isIndexExist(client(), "test_eval")) { + String testEvalMapping = + "{\"mappings\":{\"properties\":{" + + "\"name\":{\"type\":\"keyword\"}," + + "\"age\":{\"type\":\"long\"}," + + "\"title\":{\"type\":\"keyword\"}}}}"; + TestUtils.createIndexByRestClient(client(), "test_eval", testEvalMapping); - Request request2 = new Request("PUT", "/test_eval/_doc/2?refresh=true"); - request2.setJsonEntity("{\"name\": \"Bob\", \"age\": 30, \"title\": \"Manager\"}"); - client().performRequest(request2); + // Create test data for string concatenation + Request request1 = new Request("PUT", "/test_eval/_doc/1?refresh=true"); + request1.setJsonEntity("{\"name\": \"Alice\", \"age\": 25, \"title\": \"Engineer\"}"); + client().performRequest(request1); - Request request3 = new Request("PUT", "/test_eval/_doc/3?refresh=true"); - request3.setJsonEntity("{\"name\": \"Charlie\", \"age\": null, \"title\": \"Analyst\"}"); - client().performRequest(request3); + Request request2 = new Request("PUT", "/test_eval/_doc/2?refresh=true"); + request2.setJsonEntity("{\"name\": \"Bob\", \"age\": 30, \"title\": \"Manager\"}"); + client().performRequest(request2); + + Request request3 = new Request("PUT", "/test_eval/_doc/3?refresh=true"); + request3.setJsonEntity("{\"name\": \"Charlie\", \"age\": null, \"title\": \"Analyst\"}"); + client().performRequest(request3); + } } @Test public void testFieldFormatStringConcatenation() throws IOException { + // Pin the projection so column order is deterministic across execution paths — the + // analytics-engine route reads parquet schema in storage order, which can differ from the + // v2 / Lucene path's _source-iteration order. Adding an explicit | fields makes the test + // a strict assertion on the fieldformat expression rather than a coincidence of projection + // order. JSONObject result = executeQuery( StringEscapeUtils.escapeJson( - "source=test_eval | fieldformat greeting = 'Hello ' + name")); + "source=test_eval | fieldformat greeting = 'Hello ' + name | fields name, title," + + " age, greeting")); verifySchema( result, schema("name", "string"),