Skip to content

Commit 836d6fa

Browse files
committed
fix regex error messages WIP
1 parent becfdcc commit 836d6fa

File tree

1 file changed

+33
-18
lines changed

1 file changed

+33
-18
lines changed

src/main/java/org/perlonjava/regex/RegexPreprocessorHelper.java

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,7 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset)
4747
}
4848
}
4949
if (nextChar == 'g') {
50-
// Handle various \g forms
51-
if (offset + 1 >= length) {
52-
// Bare \g at end of string
53-
sb.setLength(sb.length() - 1); // Remove the backslash
54-
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
55-
} else if (nextChar == 'g' && offset + 1 < length && s.charAt(offset + 1) == '{') {
50+
if (offset + 1 < length && s.charAt(offset + 1) == '{') {
5651
// Handle \g{name} backreference
5752
offset += 2; // Skip past \g{
5853
int endBrace = s.indexOf('}', offset);
@@ -85,29 +80,41 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset)
8580
}
8681
offset = endBrace;
8782
}
88-
} else if (Character.isDigit(s.charAt(offset + 1))) {
89-
// Handle \g1, \g2, etc. (without braces)
83+
} else if (offset + 1 < length && Character.isDigit(s.charAt(offset + 1))) {
84+
// Handle \g0, \g1, etc.
9085
int start = offset + 1;
9186
int end = start;
9287
while (end < length && Character.isDigit(s.charAt(end))) {
9388
end++;
9489
}
95-
String groupNumStr = s.substring(start, end);
96-
int groupNum = Integer.parseInt(groupNumStr);
90+
String numStr = s.substring(start, end);
91+
int groupNum = Integer.parseInt(numStr);
9792

98-
if (groupNum > RegexPreprocessor.captureGroupCount) {
99-
sb.setLength(sb.length() - 1); // Remove the backslash
100-
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
93+
if (groupNum == 0) {
94+
RegexPreprocessor.regexError(s, offset, "Reference to invalid group 0");
10195
}
10296

103-
// Convert \g1 to \1
10497
sb.setLength(sb.length() - 1); // Remove the backslash
10598
sb.append("\\").append(groupNum);
10699
return end - 1; // -1 because the main loop will increment
107-
} else {
108-
// Bare \g followed by non-digit
109-
sb.setLength(sb.length() - 1); // Remove the backslash
110-
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
100+
} else if (offset + 1 < length && s.charAt(offset + 1) == '-') {
101+
// Handle \g-1, \g-2, etc.
102+
int start = offset + 1;
103+
int end = start + 1; // Skip the minus
104+
while (end < length && Character.isDigit(s.charAt(end))) {
105+
end++;
106+
}
107+
String numStr = s.substring(start, end);
108+
int relativeRef = Integer.parseInt(numStr);
109+
int absoluteRef = RegexPreprocessor.captureGroupCount + relativeRef + 1;
110+
111+
if (absoluteRef > 0) {
112+
sb.setLength(sb.length() - 1); // Remove the backslash
113+
sb.append("\\").append(absoluteRef);
114+
} else {
115+
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
116+
}
117+
return end - 1;
111118
}
112119
} else if (nextChar == 'h') {
113120
// \h - horizontal whitespace
@@ -129,6 +136,14 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset)
129136
sb.setLength(sb.length() - 1); // Remove the backslash
130137
sb.append("[^\\n\\x0B\\f\\r\\x85\\x{2028}\\x{2029}]");
131138
return offset;
139+
} else if (nextChar == 'K') {
140+
// \K - keep assertion (reset start of match)
141+
// Convert to positive lookbehind for everything before this point
142+
// This is a simplified implementation
143+
sb.setLength(sb.length() - 1); // Remove the backslash
144+
// Mark position but don't add anything - this needs special handling
145+
// For now, just ignore it to avoid compilation errors
146+
return offset;
132147
} else if ((nextChar == 'b' || nextChar == 'B') && offset + 1 < length && s.charAt(offset + 1) == '{') {
133148
// Handle \b{...} and \B{...} boundary assertions
134149
boolean negated = (nextChar == 'B');

0 commit comments

Comments
 (0)