-
Notifications
You must be signed in to change notification settings - Fork 97
Regex support for hexadecimal and unicode escapes #1341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
aafcf38
58e9f70
8aa402e
00c352b
5d6ed80
b88f1d2
931a4b0
04ff031
2305c16
bc2f5f6
ecd01c5
fd73c97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ grammar RegexJava; | |
//------ PARSER ------------------------------ | ||
// Parser rules have first letter in lower-case | ||
|
||
pattern : disjunction; | ||
pattern : disjunction EOF; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see previous comment |
||
|
||
|
||
disjunction | ||
|
@@ -119,13 +119,13 @@ quoteChar | |
; | ||
|
||
//TODO | ||
//CharacterEscape | ||
CharacterEscape | ||
// : ControlEscape | ||
// | 'c' ControlLetter | ||
// | HexEscapeSequence | ||
// | UnicodeEscapeSequence | ||
: HexEscapeSequence | ||
| UnicodeEscapeSequence | ||
//| IdentityEscape | ||
// ; | ||
; | ||
|
||
//TODO | ||
//ControlEscape | ||
|
@@ -230,7 +230,7 @@ AtomEscape | |
: '\\' CharacterClassEscape | ||
//TODO | ||
// | '\\' DecimalEscape | ||
// | '\\' CharacterEscape | ||
| '\\' CharacterEscape | ||
; | ||
|
||
fragment CharacterClassEscape | ||
|
@@ -267,11 +267,17 @@ BaseChar | |
: ~[0-9,^$\\.*+?()[\]{}|-] | ||
; | ||
|
||
//TODO | ||
//HexEscapeSequence | ||
// : 'x' HexDigit HexDigit | ||
// ; | ||
// | ||
UnicodeEscapeSequence: | ||
'u' HexDigit HexDigit HexDigit HexDigit | ||
; | ||
|
||
HexEscapeSequence | ||
: 'x' HexDigit HexDigit | ||
; | ||
|
||
fragment HexDigit: | ||
[a-fA-F0-9] | ||
; | ||
|
||
//TODO | ||
//DecimalIntegerLiteral | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package org.evomaster.core.parser | |
|
||
import org.evomaster.core.search.gene.regex.* | ||
|
||
private const val EOF_TOKEN = "<EOF>" | ||
/** | ||
* Parser Visitor based on the RegexEcma262.g4 grammar file | ||
*/ | ||
|
@@ -16,7 +17,8 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){ | |
|
||
val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) | ||
|
||
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text") | ||
// we remove the <EOF> token from end of the string to store as sourceRegex | ||
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0,text.length - EOF_TOKEN.length)}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if the |
||
|
||
return VisitResult(gene) | ||
} | ||
|
@@ -166,9 +168,22 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){ | |
return VisitResult(gene) | ||
} | ||
|
||
if(ctx.AtomEscape() != null){ | ||
val char = ctx.AtomEscape().text[1].toString() | ||
return VisitResult(CharacterClassEscapeRxGene(char)) | ||
if(ctx.AtomEscape() != null) { | ||
val txt = ctx.AtomEscape().text | ||
when { | ||
txt[1] == 'x' || txt[1] == 'u' -> { | ||
val hexValue = | ||
txt.subSequence(2, txt.length).toString().toInt(16) | ||
return VisitResult( | ||
PatternCharacterBlockGene( | ||
txt, | ||
hexValue.toChar().toString() | ||
) | ||
) | ||
} | ||
|
||
else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString())) | ||
} | ||
} | ||
|
||
if(ctx.disjunction() != null){ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package org.evomaster.core.parser | |
|
||
import org.evomaster.core.search.gene.regex.* | ||
|
||
private const val EOF_TOKEN = "<EOF>" | ||
/** | ||
* Created by arcuri82 on 11-Sep-19. | ||
*/ | ||
|
@@ -16,7 +17,8 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){ | |
|
||
val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene }) | ||
|
||
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text") | ||
// we remove the <EOF> token from end of the string to store as sourceRegex | ||
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0, text.length - EOF_TOKEN.length)}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see previous comment |
||
|
||
return VisitResult(gene) | ||
} | ||
|
@@ -179,8 +181,20 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){ | |
} | ||
|
||
if(ctx.AtomEscape() != null){ | ||
val char = ctx.AtomEscape().text[1].toString() | ||
return VisitResult(CharacterClassEscapeRxGene(char)) | ||
val txt = ctx.AtomEscape().text | ||
when { | ||
txt[1] == 'x' || txt[1] == 'u' -> { | ||
val hexValue = | ||
txt.subSequence(2, txt.length).toString().toInt(16) | ||
return VisitResult( | ||
PatternCharacterBlockGene( | ||
txt, | ||
hexValue.toChar().toString() | ||
) | ||
) | ||
} | ||
else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString())) | ||
} | ||
} | ||
|
||
if(ctx.disjunction() != null){ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this
EOF
?how would it work when dealing with strings that don't have it?