Skip to content

Commit 1a67c68

Browse files
committed
fix regex error messages WIP
1 parent 3d1a8e0 commit 1a67c68

File tree

2 files changed

+82
-20
lines changed

2 files changed

+82
-20
lines changed

src/main/java/org/perlonjava/regex/RegexPreprocessor.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,16 @@ private static int handleParentheses(String s, int offset, int length, StringBui
263263
regexError(s, offset + 1, "Sequence (? incomplete");
264264
}
265265

266-
int c2 = s.codePointAt(offset + 1);
266+
int c2 = s.codePointAt(offset + 1);
267267

268-
// Handle (?
269-
if (c2 == '?') {
268+
// Check for (*...) verb patterns FIRST, before checking (?
269+
if (c2 == '*') {
270+
// (*...) is interpreted as a verb pattern, which we don't support
271+
regexError(s, offset + 2, "Unknown verb");
272+
}
273+
274+
// Handle (?
275+
if (c2 == '?') {
270276
if (offset + 2 >= length) {
271277
// Marker should be after the ?
272278
regexError(s, offset + 2, "Sequence (? incomplete");
@@ -408,6 +414,8 @@ private static int handleCharacterClass(String s, boolean flag_xx, StringBuilder
408414
sb.append(Character.toChars(c)); // Append the '['
409415
offset++;
410416

417+
418+
411419
// Check if the bracket is properly closed
412420
int bracketEnd = offset;
413421
int depth = 1;

src/main/java/org/perlonjava/regex/RegexPreprocessorHelper.java

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,29 +32,83 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset)
3232
sb.append(nextChar);
3333
return offset;
3434
}
35+
if (nextChar == 'k' && offset + 1 < length && s.charAt(offset + 1) == '\'') {
36+
// Handle \k'name' backreference (Perl syntax)
37+
offset += 2; // Skip past \k'
38+
int endQuote = s.indexOf('\'', offset);
39+
if (endQuote != -1) {
40+
String name = s.substring(offset, endQuote);
41+
// Convert to Java syntax \k<name>
42+
sb.setLength(sb.length() - 1); // Remove the backslash
43+
sb.append("\\k<").append(name).append(">");
44+
return endQuote; // Return position at closing quote
45+
} else {
46+
RegexPreprocessor.regexError(s, offset - 2, "Unterminated \\k'...' backreference");
47+
}
48+
}
3549

36-
if (nextChar == 'g' && offset + 1 < length && s.charAt(offset + 1) == '{') {
37-
// Handle \g{name} backreference
38-
offset += 2; // Skip past \g{
39-
int endBrace = s.indexOf('}', offset);
40-
if (endBrace != -1) {
41-
String ref = s.substring(offset, endBrace);
42-
if (ref.startsWith("-")) {
43-
// Handle relative backreference
44-
int relativeRef = Integer.parseInt(ref);
45-
int absoluteRef = RegexPreprocessor.captureGroupCount + relativeRef + 1;
46-
if (absoluteRef > 0) {
50+
if (nextChar == 'g') {
51+
// Handle various \g forms
52+
if (offset + 1 >= length) {
53+
// Bare \g at end of string
54+
sb.setLength(sb.length() - 1); // Remove the backslash
55+
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
56+
} else if (s.charAt(offset + 1) == '{') {
57+
// Handle \g{name} or \g{number}
58+
offset += 2; // Skip past \g{
59+
int endBrace = s.indexOf('}', offset);
60+
if (endBrace != -1) {
61+
String ref = s.substring(offset, endBrace);
62+
if (ref.startsWith("-")) {
63+
// Handle relative backreference
64+
int relativeRef = Integer.parseInt(ref);
65+
int absoluteRef = RegexPreprocessor.captureGroupCount + relativeRef + 1;
66+
if (absoluteRef > 0) {
67+
sb.setLength(sb.length() - 1); // Remove the backslash
68+
sb.append("\\").append(absoluteRef);
69+
} else {
70+
sb.setLength(sb.length() - 1); // Remove the backslash
71+
RegexPreprocessor.regexError(s, offset - 2, "Reference to nonexistent or unclosed group");
72+
}
73+
} else if (ref.matches("\\d+")) {
74+
// Numeric reference like \g{1}
75+
int groupNum = Integer.parseInt(ref);
76+
if (groupNum > RegexPreprocessor.captureGroupCount) {
77+
sb.setLength(sb.length() - 1); // Remove the backslash
78+
RegexPreprocessor.regexError(s, offset - 2, "Reference to nonexistent group");
79+
}
4780
sb.setLength(sb.length() - 1); // Remove the backslash
48-
sb.append("\\").append(absoluteRef);
81+
sb.append("\\").append(groupNum);
4982
} else {
50-
throw new IllegalArgumentException("Invalid relative backreference: " + ref);
83+
// Handle named backreference
84+
sb.setLength(sb.length() - 1); // Remove the backslash
85+
sb.append("\\k<").append(ref).append(">");
5186
}
52-
} else {
53-
// Handle named backreference
87+
offset = endBrace;
88+
}
89+
} else if (Character.isDigit(s.charAt(offset + 1))) {
90+
// Handle \g1, \g2, etc. (without braces)
91+
int start = offset + 1;
92+
int end = start;
93+
while (end < length && Character.isDigit(s.charAt(end))) {
94+
end++;
95+
}
96+
String groupNumStr = s.substring(start, end);
97+
int groupNum = Integer.parseInt(groupNumStr);
98+
99+
if (groupNum > RegexPreprocessor.captureGroupCount) {
54100
sb.setLength(sb.length() - 1); // Remove the backslash
55-
sb.append("\\k<").append(ref).append(">");
101+
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
56102
}
57-
offset = endBrace;
103+
104+
// Convert \g1 to \1
105+
sb.setLength(sb.length() - 1); // Remove the backslash
106+
sb.append("\\").append(groupNum);
107+
return end - 1; // -1 because the main loop will increment
108+
} else {
109+
// Bare \g followed by non-digit
110+
sb.setLength(sb.length() - 1); // Remove the backslash
111+
RegexPreprocessor.regexError(s, offset, "Reference to nonexistent group");
58112
}
59113
} else if ((nextChar == 'b' || nextChar == 'B') && offset + 1 < length && s.charAt(offset + 1) == '{') {
60114
// Handle \b{...} and \B{...} boundary assertions

0 commit comments

Comments
 (0)