@@ -32,29 +32,83 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset)
32
32
sb .append (nextChar );
33
33
return offset ;
34
34
}
35
+ if (nextChar == 'k' && offset + 1 < length && s .charAt (offset + 1 ) == '\'' ) {
36
+ // Handle \k'name' backreference (Perl syntax)
37
+ offset += 2 ; // Skip past \k'
38
+ int endQuote = s .indexOf ('\'' , offset );
39
+ if (endQuote != -1 ) {
40
+ String name = s .substring (offset , endQuote );
41
+ // Convert to Java syntax \k<name>
42
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
43
+ sb .append ("\\ k<" ).append (name ).append (">" );
44
+ return endQuote ; // Return position at closing quote
45
+ } else {
46
+ RegexPreprocessor .regexError (s , offset - 2 , "Unterminated \\ k'...' backreference" );
47
+ }
48
+ }
35
49
36
- if (nextChar == 'g' && offset + 1 < length && s .charAt (offset + 1 ) == '{' ) {
37
- // Handle \g{name} backreference
38
- offset += 2 ; // Skip past \g{
39
- int endBrace = s .indexOf ('}' , offset );
40
- if (endBrace != -1 ) {
41
- String ref = s .substring (offset , endBrace );
42
- if (ref .startsWith ("-" )) {
43
- // Handle relative backreference
44
- int relativeRef = Integer .parseInt (ref );
45
- int absoluteRef = RegexPreprocessor .captureGroupCount + relativeRef + 1 ;
46
- if (absoluteRef > 0 ) {
50
+ if (nextChar == 'g' ) {
51
+ // Handle various \g forms
52
+ if (offset + 1 >= length ) {
53
+ // Bare \g at end of string
54
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
55
+ RegexPreprocessor .regexError (s , offset , "Reference to nonexistent group" );
56
+ } else if (s .charAt (offset + 1 ) == '{' ) {
57
+ // Handle \g{name} or \g{number}
58
+ offset += 2 ; // Skip past \g{
59
+ int endBrace = s .indexOf ('}' , offset );
60
+ if (endBrace != -1 ) {
61
+ String ref = s .substring (offset , endBrace );
62
+ if (ref .startsWith ("-" )) {
63
+ // Handle relative backreference
64
+ int relativeRef = Integer .parseInt (ref );
65
+ int absoluteRef = RegexPreprocessor .captureGroupCount + relativeRef + 1 ;
66
+ if (absoluteRef > 0 ) {
67
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
68
+ sb .append ("\\ " ).append (absoluteRef );
69
+ } else {
70
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
71
+ RegexPreprocessor .regexError (s , offset - 2 , "Reference to nonexistent or unclosed group" );
72
+ }
73
+ } else if (ref .matches ("\\ d+" )) {
74
+ // Numeric reference like \g{1}
75
+ int groupNum = Integer .parseInt (ref );
76
+ if (groupNum > RegexPreprocessor .captureGroupCount ) {
77
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
78
+ RegexPreprocessor .regexError (s , offset - 2 , "Reference to nonexistent group" );
79
+ }
47
80
sb .setLength (sb .length () - 1 ); // Remove the backslash
48
- sb .append ("\\ " ).append (absoluteRef );
81
+ sb .append ("\\ " ).append (groupNum );
49
82
} else {
50
- throw new IllegalArgumentException ("Invalid relative backreference: " + ref );
83
+ // Handle named backreference
84
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
85
+ sb .append ("\\ k<" ).append (ref ).append (">" );
51
86
}
52
- } else {
53
- // Handle named backreference
87
+ offset = endBrace ;
88
+ }
89
+ } else if (Character .isDigit (s .charAt (offset + 1 ))) {
90
+ // Handle \g1, \g2, etc. (without braces)
91
+ int start = offset + 1 ;
92
+ int end = start ;
93
+ while (end < length && Character .isDigit (s .charAt (end ))) {
94
+ end ++;
95
+ }
96
+ String groupNumStr = s .substring (start , end );
97
+ int groupNum = Integer .parseInt (groupNumStr );
98
+
99
+ if (groupNum > RegexPreprocessor .captureGroupCount ) {
54
100
sb .setLength (sb .length () - 1 ); // Remove the backslash
55
- sb . append ( " \\ k<" ). append ( ref ). append ( "> " );
101
+ RegexPreprocessor . regexError ( s , offset , "Reference to nonexistent group " );
56
102
}
57
- offset = endBrace ;
103
+
104
+ // Convert \g1 to \1
105
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
106
+ sb .append ("\\ " ).append (groupNum );
107
+ return end - 1 ; // -1 because the main loop will increment
108
+ } else {
109
+ // Bare \g followed by non-digit
110
+ sb .setLength (sb .length () - 1 ); // Remove the backslash
111
+ RegexPreprocessor .regexError (s , offset , "Reference to nonexistent group" );
58
112
}
59
113
} else if ((nextChar == 'b' || nextChar == 'B' ) && offset + 1 < length && s .charAt (offset + 1 ) == '{' ) {
60
114
// Handle \b{...} and \B{...} boundary assertions
0 commit comments