Skip to content

Commit 5cd0ac1

Browse files
committed
offset field - fix some issue with maxmatch
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent dce13b4 commit 5cd0ac1

3 files changed

Lines changed: 70 additions & 36 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,14 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
225225
}
226226

227227
if (node.getOffsetField().isPresent()) {
228+
int maxMatchValue = node.getMaxMatch().orElse(1);
228229
RexNode offsetCall =
229230
PPLFuncImpTable.INSTANCE.resolve(
230231
context.rexBuilder,
231232
BuiltinFunctionName.REX_OFFSET,
232233
fieldRex,
233-
context.rexBuilder.makeLiteral(patternStr));
234+
context.rexBuilder.makeLiteral(patternStr),
235+
context.relBuilder.literal(maxMatchValue));
234236
newFields.add(offsetCall);
235237
newFieldNames.add(node.getOffsetField().get());
236238
}

core/src/main/java/org/opensearch/sql/expression/function/udf/RexOffsetFunction.java

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public SqlReturnTypeInference getReturnTypeInference() {
3434

3535
@Override
3636
public UDFOperandMetadata getOperandMetadata() {
37-
return PPLOperandTypes.STRING_STRING;
37+
return PPLOperandTypes.STRING_STRING_INTEGER;
3838
}
3939

4040
private static class RexOffsetImplementor implements NotNullImplementor {
@@ -44,12 +44,13 @@ public Expression implement(
4444
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
4545
Expression field = translatedOperands.get(0);
4646
Expression pattern = translatedOperands.get(1);
47+
Expression maxMatch = translatedOperands.get(2);
4748

48-
return Expressions.call(RexOffsetFunction.class, "calculateOffsets", field, pattern);
49+
return Expressions.call(RexOffsetFunction.class, "calculateOffsets", field, pattern, maxMatch);
4950
}
5051
}
5152

52-
public static String calculateOffsets(String text, String patternStr) {
53+
public static String calculateOffsets(String text, String patternStr, int maxMatch) {
5354
if (text == null || patternStr == null) {
5455
return null;
5556
}
@@ -58,33 +59,37 @@ public static String calculateOffsets(String text, String patternStr) {
5859
Pattern pattern = Pattern.compile(patternStr);
5960
Matcher matcher = pattern.matcher(text);
6061

61-
if (!matcher.find()) {
62-
return null;
63-
}
64-
65-
List<String> offsetPairs = new java.util.ArrayList<>();
62+
List<String> allOffsetPairs = new java.util.ArrayList<>();
6663

6764
Pattern namedGroupPattern = Pattern.compile("\\(\\?<([^>]+)>");
6865
Matcher namedGroupMatcher = namedGroupPattern.matcher(patternStr);
69-
70-
int groupIndex = 1;
71-
66+
List<String> groupNames = new java.util.ArrayList<>();
7267
while (namedGroupMatcher.find()) {
73-
String groupName = namedGroupMatcher.group(1);
68+
groupNames.add(namedGroupMatcher.group(1));
69+
}
70+
71+
if (groupNames.isEmpty()) {
72+
return null;
73+
}
7474

75-
if (groupIndex <= matcher.groupCount()) {
75+
// Find matches up to maxMatch limit and collect offsets
76+
int matchCount = 0;
77+
int maxMatchLimit = (maxMatch > 0) ? maxMatch : Integer.MAX_VALUE;
78+
79+
while (matcher.find() && matchCount < maxMatchLimit) {
80+
for (int groupIndex = 1; groupIndex <= matcher.groupCount() && groupIndex <= groupNames.size(); groupIndex++) {
81+
String groupName = groupNames.get(groupIndex - 1);
7682
int start = matcher.start(groupIndex);
7783
int end = matcher.end(groupIndex);
7884

7985
if (start >= 0 && end >= 0) {
80-
offsetPairs.add(groupName + "=" + start + "-" + (end - 1));
86+
allOffsetPairs.add(groupName + "=" + start + "-" + (end - 1));
8187
}
8288
}
83-
groupIndex++;
89+
matchCount++;
8490
}
8591

86-
java.util.Collections.reverse(offsetPairs);
87-
return offsetPairs.isEmpty() ? null : String.join("&", offsetPairs);
92+
return allOffsetPairs.isEmpty() ? null : String.join("&", allOffsetPairs);
8893
} catch (Exception e) {
8994
return null;
9095
}

core/src/test/java/org/opensearch/sql/expression/function/udf/RexOffsetFunctionTest.java

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,17 @@ public void testCalculateOffsets_SimpleExample() {
2020
String text = "user@domain.com";
2121
String pattern = "(?<username>\\w+)@(?<domain>\\w+)\\.(?<tld>\\w+)";
2222

23-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
24-
// user=0-3, domain=5-10, com=12-14 (but order is reversed)
25-
assertEquals("tld=12-14&domain=5-10&username=0-3", result);
23+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
24+
// Groups appear in pattern order: username, domain, tld
25+
assertEquals("username=0-3&domain=5-10&tld=12-14", result);
2626
}
2727

2828
@Test
2929
public void testCalculateOffsets_SingleNamedGroup() {
3030
String text = "hello world";
3131
String pattern = "(?<word>\\w+)";
3232

33-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
33+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
3434
assertEquals("word=0-4", result);
3535
}
3636

@@ -39,31 +39,31 @@ public void testCalculateOffsets_TwoGroups() {
3939
String text = "abc123";
4040
String pattern = "(?<letters>[a-z]+)(?<numbers>\\d+)";
4141

42-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
43-
assertEquals("numbers=3-5&letters=0-2", result);
42+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
43+
assertEquals("letters=0-2&numbers=3-5", result);
4444
}
4545

4646
@Test
4747
public void testCalculateOffsets_NoMatches() {
4848
String text = "This text has no digits";
4949
String pattern = "(?<digit>\\d+)";
5050

51-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
51+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
5252
assertNull(result);
5353
}
5454

5555
@Test
5656
public void testCalculateOffsets_NullInputs() {
5757
// Null text
58-
String result = RexOffsetFunction.calculateOffsets(null, "(?<test>\\w+)");
58+
String result = RexOffsetFunction.calculateOffsets(null, "(?<test>\\w+)", 1);
5959
assertNull(result);
6060

6161
// Null pattern
62-
result = RexOffsetFunction.calculateOffsets("test text", null);
62+
result = RexOffsetFunction.calculateOffsets("test text", null, 1);
6363
assertNull(result);
6464

6565
// Both null
66-
result = RexOffsetFunction.calculateOffsets(null, null);
66+
result = RexOffsetFunction.calculateOffsets(null, null, 1);
6767
assertNull(result);
6868
}
6969

@@ -72,7 +72,7 @@ public void testCalculateOffsets_InvalidPattern() {
7272
String text = "test string";
7373
String invalidPattern = "[unclosed";
7474

75-
String result = RexOffsetFunction.calculateOffsets(text, invalidPattern);
75+
String result = RexOffsetFunction.calculateOffsets(text, invalidPattern, 1);
7676
assertNull(result);
7777
}
7878

@@ -81,7 +81,7 @@ public void testCalculateOffsets_EmptyString() {
8181
String text = "";
8282
String pattern = "(?<word>\\w+)";
8383

84-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
84+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
8585
assertNull(result);
8686
}
8787

@@ -90,7 +90,7 @@ public void testCalculateOffsets_PatternWithoutNamedGroups() {
9090
String text = "test123";
9191
String pattern = "(\\w+)(\\d+)";
9292

93-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
93+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
9494
assertNull(result);
9595
}
9696

@@ -99,7 +99,7 @@ public void testCalculateOffsets_SingleCharacterMatch() {
9999
String text = "a";
100100
String pattern = "(?<char>[a-z])";
101101

102-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
102+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
103103
assertEquals("char=0-0", result);
104104
}
105105

@@ -108,17 +108,17 @@ public void testCalculateOffsets_DigitsPattern() {
108108
String text = "year 2023 month 12";
109109
String pattern = "(?<year>\\d{4}).*(?<month>\\d{2})";
110110

111-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
112-
assertEquals("month=16-17&year=5-8", result);
111+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
112+
assertEquals("year=5-8&month=16-17", result);
113113
}
114114

115115
@Test
116116
public void testCalculateOffsets_EmailExample() {
117117
String text = "email: john@example.org";
118118
String pattern = "(?<name>\\w+)@(?<domain>\\w+)\\.(?<ext>\\w+)";
119119

120-
String result = RexOffsetFunction.calculateOffsets(text, pattern);
121-
assertEquals("ext=20-22&domain=12-18&name=7-10", result);
120+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 1);
121+
assertEquals("name=7-10&domain=12-18&ext=20-22", result);
122122
}
123123

124124
@Test
@@ -136,4 +136,31 @@ public void testFunctionConstructor() {
136136
RexOffsetFunction testFunction = new RexOffsetFunction();
137137
assertNotNull(testFunction, "Function should be properly initialized");
138138
}
139+
140+
@Test
141+
public void testCalculateOffsets_MaxMatchMultiple() {
142+
String text = "880 Holmes Lane";
143+
String pattern = "(?<digit>\\d)";
144+
145+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 2);
146+
assertEquals("digit=0-0&digit=1-1", result);
147+
}
148+
149+
@Test
150+
public void testCalculateOffsets_MaxMatchUnlimited() {
151+
String text = "880 Holmes Lane";
152+
String pattern = "(?<digit>\\d)";
153+
154+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 0);
155+
assertEquals("digit=0-0&digit=1-1&digit=2-2", result);
156+
}
157+
158+
@Test
159+
public void testCalculateOffsets_MaxMatchExceedsAvailable() {
160+
String text = "880";
161+
String pattern = "(?<digit>\\d)";
162+
163+
String result = RexOffsetFunction.calculateOffsets(text, pattern, 10);
164+
assertEquals("digit=0-0&digit=1-1&digit=2-2", result);
165+
}
139166
}

0 commit comments

Comments
 (0)