Skip to content

Commit 473d02c

Browse files
authored
Merge pull request #1349 from WebFuzzing/external-pr-lmasroca
External pr lmasroca from #1341
2 parents f95d4d5 + ab428c8 commit 473d02c

File tree

6 files changed

+91
-29
lines changed

6 files changed

+91
-29
lines changed

core/src/main/antlr4/org/evomaster/core/parser/RegexEcma262.g4

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ grammar RegexEcma262;
2222
//------ PARSER ------------------------------
2323
// Parser rules have first letter in lower-case
2424

25-
pattern : disjunction;
25+
pattern : disjunction EOF;
2626

2727

2828
disjunction
@@ -96,13 +96,13 @@ atom
9696

9797

9898
//TODO
99-
//CharacterEscape
99+
CharacterEscape
100100
// : ControlEscape
101101
// | 'c' ControlLetter
102-
// | HexEscapeSequence
103-
// | UnicodeEscapeSequence
102+
: HexEscapeSequence
103+
| UnicodeEscapeSequence
104104
//| IdentityEscape
105-
// ;
105+
;
106106

107107
//TODO
108108
//ControlEscape
@@ -205,7 +205,7 @@ AtomEscape
205205
: '\\' CharacterClassEscape
206206
//TODO
207207
// | '\\' DecimalEscape
208-
// | '\\' CharacterEscape
208+
| '\\' CharacterEscape
209209
;
210210

211211
fragment CharacterClassEscape
@@ -238,11 +238,17 @@ BaseChar
238238
: ~[0-9,^$\\.*+?()[\]{}|-]
239239
;
240240

241-
//TODO
242-
//HexEscapeSequence
243-
// : 'x' HexDigit HexDigit
244-
// ;
245-
//
241+
UnicodeEscapeSequence
242+
: 'u' HexDigit HexDigit HexDigit HexDigit
243+
;
244+
245+
HexEscapeSequence
246+
: 'x' HexDigit HexDigit
247+
;
248+
249+
fragment HexDigit:
250+
[a-fA-F0-9]
251+
;
246252

247253
//TODO
248254
//DecimalIntegerLiteral

core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ grammar RegexJava;
2323
//------ PARSER ------------------------------
2424
// Parser rules have first letter in lower-case
2525

26-
pattern : disjunction;
26+
pattern : disjunction EOF;
2727

2828

2929
disjunction
@@ -119,13 +119,13 @@ quoteChar
119119
;
120120

121121
//TODO
122-
//CharacterEscape
122+
CharacterEscape
123123
// : ControlEscape
124124
// | 'c' ControlLetter
125-
// | HexEscapeSequence
126-
// | UnicodeEscapeSequence
125+
: HexEscapeSequence
126+
| UnicodeEscapeSequence
127127
//| IdentityEscape
128-
// ;
128+
;
129129

130130
//TODO
131131
//ControlEscape
@@ -230,7 +230,7 @@ AtomEscape
230230
: '\\' CharacterClassEscape
231231
//TODO
232232
// | '\\' DecimalEscape
233-
// | '\\' CharacterEscape
233+
| '\\' CharacterEscape
234234
;
235235

236236
fragment CharacterClassEscape
@@ -267,11 +267,17 @@ BaseChar
267267
: ~[0-9,^$\\.*+?()[\]{}|-]
268268
;
269269

270-
//TODO
271-
//HexEscapeSequence
272-
// : 'x' HexDigit HexDigit
273-
// ;
274-
//
270+
UnicodeEscapeSequence:
271+
'u' HexDigit HexDigit HexDigit HexDigit
272+
;
273+
274+
HexEscapeSequence
275+
: 'x' HexDigit HexDigit
276+
;
277+
278+
fragment HexDigit:
279+
[a-fA-F0-9]
280+
;
275281

276282
//TODO
277283
//DecimalIntegerLiteral

core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.evomaster.core.parser
22

33
import org.evomaster.core.search.gene.regex.*
44

5+
private const val EOF_TOKEN = "<EOF>"
56
/**
67
* Parser Visitor based on the RegexEcma262.g4 grammar file
78
*/
@@ -16,7 +17,8 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){
1617

1718
val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene })
1819

19-
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text")
20+
// we remove the <EOF> token from end of the string to store as sourceRegex
21+
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0,text.length - EOF_TOKEN.length)}")
2022

2123
return VisitResult(gene)
2224
}
@@ -166,9 +168,22 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){
166168
return VisitResult(gene)
167169
}
168170

169-
if(ctx.AtomEscape() != null){
170-
val char = ctx.AtomEscape().text[1].toString()
171-
return VisitResult(CharacterClassEscapeRxGene(char))
171+
if(ctx.AtomEscape() != null) {
172+
val txt = ctx.AtomEscape().text
173+
when {
174+
txt[1] == 'x' || txt[1] == 'u' -> {
175+
val hexValue =
176+
txt.subSequence(2, txt.length).toString().toInt(16)
177+
return VisitResult(
178+
PatternCharacterBlockGene(
179+
txt,
180+
hexValue.toChar().toString()
181+
)
182+
)
183+
}
184+
185+
else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString()))
186+
}
172187
}
173188

174189
if(ctx.disjunction() != null){

core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.evomaster.core.parser
22

33
import org.evomaster.core.search.gene.regex.*
44

5+
private const val EOF_TOKEN = "<EOF>"
56
/**
67
* Created by arcuri82 on 11-Sep-19.
78
*/
@@ -16,7 +17,8 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){
1617

1718
val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene })
1819

19-
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text")
20+
// we remove the <EOF> token from end of the string to store as sourceRegex
21+
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0, text.length - EOF_TOKEN.length)}")
2022

2123
return VisitResult(gene)
2224
}
@@ -179,8 +181,20 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){
179181
}
180182

181183
if(ctx.AtomEscape() != null){
182-
val char = ctx.AtomEscape().text[1].toString()
183-
return VisitResult(CharacterClassEscapeRxGene(char))
184+
val txt = ctx.AtomEscape().text
185+
when {
186+
txt[1] == 'x' || txt[1] == 'u' -> {
187+
val hexValue =
188+
txt.subSequence(2, txt.length).toString().toInt(16)
189+
return VisitResult(
190+
PatternCharacterBlockGene(
191+
txt,
192+
hexValue.toChar().toString()
193+
)
194+
)
195+
}
196+
else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString()))
197+
}
184198
}
185199

186200
if(ctx.disjunction() != null){

core/src/test/kotlin/org/evomaster/core/parser/GeneRegexEcma262VisitorTest.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,4 +326,14 @@ open class GeneRegexEcma262VisitorTest : RegexTestTemplate(){
326326
// p = 1 / 2^6 = 1 / 64
327327
checkCanSample("^((a|A)(b|B)(c|C)123(e|E)(f|F)(d|D))$", "aBc123EFd", 10_000)
328328
}
329+
330+
@Test
331+
fun testHexEscape(){
332+
checkSameAsJava("""\x00\x0a\xba\xFF""")
333+
}
334+
335+
@Test
336+
fun testUnicodeEscape(){
337+
checkSameAsJava("""\u0000\u0a0b\uffff""")
338+
}
329339
}

core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,15 @@ internal class RegexHandlerTest{
121121

122122
}
123123

124+
@Test
125+
fun testCreateGeneForJVMInvalidRegex() {
126+
127+
assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForJVM("\\xR") }
128+
}
129+
130+
@Test
131+
fun testCreateGeneForEcma262InvalidRegex() {
132+
133+
assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForEcma262("\\xR") }
134+
}
124135
}

0 commit comments

Comments
 (0)