From 9ff7f1827cefafdbf55604a1f6adaa4a20ed35d0 Mon Sep 17 00:00:00 2001 From: Michael Wolfendale Date: Sun, 11 Nov 2018 09:10:11 +0000 Subject: [PATCH] Refactored pretty much everything --- .../scalacheck/regexp/ASTProcessor.scala | 220 +++++++-------- .../scalacheck/regexp/GenParser.scala | 264 +++++++++++------- .../scalacheck/regexp/ast/AST.scala | 75 +++-- .../scalacheck/regexp/ASTProcessorSpec.scala | 120 +++----- .../scalacheck/regexp/GenParserSpec.scala | 213 +++++++------- 5 files changed, 422 insertions(+), 470 deletions(-) diff --git a/src/main/scala/wolfendale/scalacheck/regexp/ASTProcessor.scala b/src/main/scala/wolfendale/scalacheck/regexp/ASTProcessor.scala index 9106cdc..82b11ae 100644 --- a/src/main/scala/wolfendale/scalacheck/regexp/ASTProcessor.scala +++ b/src/main/scala/wolfendale/scalacheck/regexp/ASTProcessor.scala @@ -2,148 +2,116 @@ package wolfendale.scalacheck.regexp import org.scalacheck.{Arbitrary, Gen} import ast._ +import wolfendale.scalacheck.regexp.data.Group.{Exclusion, Inclusion} object ASTProcessor { - // TODO negation which doesn't use `suchThat`? - private def negated(re: Negated)(implicit ev: Arbitrary[Char]): Gen[String] = { - - val arbitraryString: Gen[String] = - Arbitrary.arbitrary[Char].map(_.toString) - - def termToString(term: CharacterClass.Term): String = { - term match { - case CharacterClass.Literal(str) => - str - case CharacterClass.CharRange(min, max) => - s"$min-$max" - case CharacterClass.DigitRange(min, max) => - s"$min-$max" - case CharacterClass.DigitChar => - "\\d" - case CharacterClass.WordChar => - "\\w" - case CharacterClass.SpaceChar => - "\\s" - case _ => - "" - } - } - - re match { - case Negated(WordChar) => - arbitraryString.suchThat(_.matches("\\W")) - case Negated(DigitChar) => - arbitraryString.suchThat(_.matches("\\D")) - case Negated(SpaceChar) => - arbitraryString.suchThat(_.matches("\\S")) - case Negated(WordBoundary) => + def apply(expression: RegularExpression)(implicit ev: Arbitrary[Char]): Gen[String] = { + expression match { + case Literal(value) => + Gen.const(value.toString) + case Group(term, rest, _) => + for { + termGen <- apply(term) + restGen <- rest.map(apply) + .getOrElse(Gen.const("")) + } yield termGen + restGen + case Or(left, right) => + Gen.oneOf(apply(left), apply(right)) + case And(left, right) => + for { + leftGen <- apply(left) + rightGen <- apply(right) + } yield leftGen + rightGen + case _: Meta => Gen.const("") - case Negated(CharacterClass(terms @ _*)) => - arbitraryString.suchThat(_.matches(s"[^${terms.map(termToString).mkString("")}]")) - // TODO fix AST so that this isn't a valid construction - case _ => - sys.error("invalid negated term") + case expression: Quantified => + quantified(expression) + case expression: CharacterClass.Group.Term => + characterClass(expression) + .map(_.toString) + case expression => + sys.error(s"Unsupported syntax! $expression") } } - // TODO tailrec optimisation - def apply(re: RegularExpression)(implicit ev: Arbitrary[Char]): Gen[String] = { - - re match { - case Literal(str) => - literal(str) - case WordChar => - wordChar - case SpaceChar => - spaceChar - case DigitChar => - digitChar - case AnyChar => - Arbitrary.arbitrary[Char].map(_.toString) - case Group(inner) => - apply(inner) - case NonCapturingGroup(inner) => - apply(inner) - case Or(left, right) => - Gen.oneOf(apply(left), apply(right)) - case And(left, right) => + private def quantified(expression: Quantified)(implicit ev: Arbitrary[Char]): Gen[String] = { + expression match { + case Optional(term) => + Gen.option(apply(term)) + .map(_.getOrElse("")) + case ZeroOrMore(term) => + Gen.listOf(apply(term)) + .map(_.mkString) + case OneOrMore(term) => for { - l <- apply(left) - r <- apply(right) - } yield l + r - case Optional(inner) => - optional(apply(inner)) - case OneOrMore(inner) => - Gen.nonEmptyListOf(apply(inner)).map(_.mkString("")) - case ZeroOrMore(inner) => - Gen.listOf(apply(inner)).map(_.mkString("")) - case RangeFrom(inner, min) => - // configurable defaults + num <- Gen.chooseNum(1, 100) + list <- Gen.listOfN(num, apply(term)) + } yield list.mkString + case Length(term, length) => + Gen.listOfN(length, apply(term)) + .map(_.mkString) + case RangeFrom(term, min) => for { - length <- Gen.choose(min, 100) - list <- Gen.listOfN(length, apply(inner)) - } yield list.mkString("") - case Range(inner, min, max) => + num <- Gen.chooseNum(min, 100) + list <- Gen.listOfN(num, apply(term)) + } yield list.mkString + case Range(term, min, max) => for { - length <- Gen.choose(min, max) - list <- Gen.listOfN(length, apply(inner)) - } yield list.mkString("") - case Length(inner, length) => - Gen.listOfN(length, apply(inner)).map(_.mkString("")) - case CharacterClass(terms@_*) => - processClass(terms) - case term: Negated => - negated(term) - case term: Substitution => - sys.error("backreferences are not supported") - case WordBoundary | BOS | EOS => - Gen.const("") + num <- Gen.chooseNum(min, max) + list <- Gen.listOfN(num, apply(term)) + } yield list.mkString } } - private def processClass(terms: Seq[CharacterClass.Term]): Gen[String] = { - - val gens = terms.toList.map { - case CharacterClass.Literal(str) => - literal(str) - case CharacterClass.DigitRange(min, max) => - Gen.choose(min, max).map(_.toString) - case CharacterClass.CharRange(min, max) => - Gen.choose(min, max).map(_.toString) - case CharacterClass.WordChar => - wordChar - case CharacterClass.SpaceChar => - spaceChar - case CharacterClass.DigitChar => - digitChar - case _ => - Gen.const("") + private def characterClass(expression: CharacterClass.Group.Term)(implicit ev: Arbitrary[Char]): Gen[Char] = { + + import CharacterClass._ + + val digits: data.Group[Char] = Inclusion((48.toChar to 57.toChar).toSet) + val spaces: data.Group[Char] = Inclusion(Set(' ', '\t', '\r', '\n')) + val alphaUpper: data.Group[Char] = Inclusion((65.toChar to 90.toChar).toSet) + val alphaLower: data.Group[Char] = Inclusion((97.toChar to 122.toChar).toSet) + val alpha: data.Group[Char] = alphaUpper ++ alphaLower + val alphaNum: data.Group[Char] = alpha ++ digits + val word: data.Group[Char] = alphaNum ++ Inclusion(Set('_')) //, '-')) + val any: data.Group[Char] = Exclusion(Set.empty) + + def toGroup(expression: CharacterClass.Group.Term): data.Group[Char] = { + expression match { + case CharacterClass.Literal(value) => + Inclusion(Set(value)) + case CharacterClass.Group(values @ _*) => + values.map(toGroup).foldLeft[data.Group[Char]](Inclusion(Set.empty)) { + _ ++ _ + } + case CharacterClass.Range(min, max) => + Inclusion((min to max).toSet) + case Word => + word + case Digit => + digits + case Space => + spaces + case Any => + any + case Negated(characterClass) => + toGroup(characterClass).compliment + case expression => + sys.error(s"Unsupported syntax! $expression") + } } - gens match { - case a :: Nil => a - case a :: b :: xs => - Gen.oneOf(a, b, xs: _*) - case _ => - Gen.const("") + def toGen(group: data.Group[Char]): Gen[Char] = { + group match { + case Inclusion(values) => + Gen.oneOf(values.toSeq) + case Exclusion(values) => + Arbitrary.arbitrary[Char] + .suchThat(!values.contains(_)) + } } - } - - private val wordChar: Gen[String] = - Gen.oneOf(Gen.alphaNumChar, Gen.const('_')).map(_.toString) - private val spaceChar: Gen[String] = { - // should this contain other characters? - Gen.oneOf(" ", "\t") + toGen(toGroup(expression)) } - - private val digitChar: Gen[String] = - Gen.numChar.map(_.toString) - - private def literal(str: String): Gen[String] = - Gen.const(str) - - private def optional(inner: Gen[String]): Gen[String] = - Gen.frequency(1 -> inner, 1 -> Gen.const("")) } diff --git a/src/main/scala/wolfendale/scalacheck/regexp/GenParser.scala b/src/main/scala/wolfendale/scalacheck/regexp/GenParser.scala index 4b1f831..4acce1e 100644 --- a/src/main/scala/wolfendale/scalacheck/regexp/GenParser.scala +++ b/src/main/scala/wolfendale/scalacheck/regexp/GenParser.scala @@ -8,151 +8,203 @@ object GenParser extends RegexParsers with PackratParsers { override def skipWhitespace: Boolean = false - lazy val expression0: PackratParser[RegularExpression] = group | characterClass | term + private lazy val int: Parser[Int] = { + "\\d+".r ^^ { _.toInt } + } - lazy val expression1: PackratParser[RegularExpression] = { + private lazy val any: Parser[String] = ".".r - val int: Parser[Int] = "\\d+".r ^^ { _.toInt } + private lazy val literalTerm: Parser[Literal] = { - // annoyingly hacky :( and it potentially breaks associativity but it's - // nicer than having a concat rule for our purposes - def splitLiteral(re: RegularExpression, result: RegularExpression => RegularExpression): RegularExpression = { - re match { - case Literal(s) if s.length > 1 => - And(Literal(s.init), result(Literal(s.last.toString))) - case other => - result(other) - } + val meta: Parser[String] = + "\\w" | "\\d" | "\\s" | "\\W" | "\\D" | "\\S" | + ")" | "(" | "$" | "[" | "." | "+" | "*" | "?" | "|" | "\\" | "{" + + val escaped: Parser[String] = + "\\" ~> meta + + val allowed: Parser[String] = + not(meta) ~> any + + (escaped | allowed) ^^ { + str => + Literal(str.last) } + } + + private lazy val characterClass: Parser[CharacterClass with Term] = { + + lazy val insideCharacterClass: Parser[CharacterClass.Group.Term] = { + + val allowed: Parser[String] = + not("]" | "\\") ~> any + + val literalTerm: Parser[CharacterClass.Literal] = { + + val meta: Parser[String] = + "\\w" | "\\d" | "\\s" | "\\W" | "\\D" | "\\S" - // quantifiers - val optional = expression0 <~ "?" ^^ { splitLiteral(_, Optional) } - val oneOrMore = expression0 <~ "+" ^^ { splitLiteral(_, OneOrMore) } - val zeroOrMore = expression0 <~ "*" ^^ { splitLiteral(_, ZeroOrMore) } + val escaped: Parser[String] = + "\\" ~> (meta | any) - val rangeFrom = expression0 ~ ("{" ~> int <~ ",}") ^^ { - case expr ~ min => - splitLiteral(expr, RangeFrom(_, min)) + (escaped | allowed) ^^ { + str => + CharacterClass.Literal(str.last) + } + } + + val range: Parser[CharacterClass.Range] = { + + any ~ ("-" ~> allowed) ^^ { + case min ~ max => + CharacterClass.Range(min.last, max.last) + } + } + + lazy val intersection: PackratParser[CharacterClass.Intersection] = { + + val separator: Parser[String] = "&&" + + insideCharacterClass ~ (separator ~> insideCharacterClass) ^^ { + case one ~ two => + CharacterClass.Intersection(one, two) + } + } + + intersection | characterClass | range | literalTerm } - val range = expression0 ~ ("{" ~> int ~ ("," ~> int <~ "}")) ^^ { - case expr ~ (min ~ max) => - splitLiteral(expr, Range(_, min, max)) + lazy val inclusive: Parser[CharacterClass with Term] = { + + lazy val group: Parser[CharacterClass.Group] = { + + '[' ~> insideCharacterClass.* <~ ']' ^^ { + classes => + CharacterClass.Group(classes: _*) + } + } + + val word: Parser[CharacterClass.Word.type] = + "\\w" ^^^ { CharacterClass.Word } + val digit: Parser[CharacterClass.Digit.type] = + "\\d" ^^^ { CharacterClass.Digit } + val space: Parser[CharacterClass.Space.type] = + "\\s" ^^^ { CharacterClass.Space } + val any: Parser[CharacterClass.Any.type] = + '.' ^^^ { CharacterClass.Any } + + group | word | digit | space | any } - val length = expression0 ~ ("{" ~> int <~ "}") ^^ { - case expr ~ l => - splitLiteral(expr, Length(_, l)) + lazy val exclusive: Parser[CharacterClass.Negated] = { + + lazy val negatedGroup = + "[^" ~> insideCharacterClass.* <~ ']' ^^ { + classes => + CharacterClass.Negated(CharacterClass.Group(classes: _*)) + } + + val negatedWord = + "\\W" ^^^ { CharacterClass.Negated(CharacterClass.Word) } + val negatedDigit = + "\\D" ^^^ { CharacterClass.Negated(CharacterClass.Digit) } + val negatedSpace = + "\\S" ^^^ { CharacterClass.Negated(CharacterClass.Space) } + + negatedGroup | negatedWord | negatedSpace | negatedDigit } - optional | oneOrMore | zeroOrMore | rangeFrom | range | length | expression0 + exclusive | inclusive } - // and - lazy val expression2: PackratParser[RegularExpression] = - (expression2 ~ expression1) ^^ { case a ~ b => And(a, b) } | expression1 + private lazy val meta: Parser[Meta] = { - // or - lazy val expression: PackratParser[RegularExpression] = - (expression ~ ("|" ~> expression2) ^^ { case a ~ b => Or(a, b) }) | expression2 + val start: Parser[Start.type] = + '^' ^^^ { Start } - lazy val group: PackratParser[RegularExpression] = { + val end: Parser[End.type] = + '$' ^^^ { End } - lazy val nonCapturingGroup = "(?:" ~> expression <~ ")" ^^ NonCapturingGroup - lazy val capturingGroup = "(" ~> expression <~ ")" ^^ Group + val wordBoundary: Parser[WordBoundary.type] = + "\\b" ^^^ { WordBoundary } - capturingGroup | nonCapturingGroup + val negatedWordBoundary: Parser[NegatedWordBoundary.type] = + "\\B" ^^^ { NegatedWordBoundary } + + start | end | wordBoundary | negatedWordBoundary } + private lazy val substitution: Parser[Substitution] = + '\\' ~> int ^^ { Substitution.apply } + + private lazy val quantified: PackratParser[Quantified] = { + + val optional: Parser[Optional] = + (term <~ '?') ^^ { Optional.apply } - // character classes - lazy val characterClass: PackratParser[RegularExpression] = { + val zeroOrMore: Parser[ZeroOrMore] = + (term <~ '*') ^^ { ZeroOrMore.apply } - lazy val digitRange: Parser[CharacterClass.Term] = { - val d: Parser[Int] = "\\d".r ^^ { _.toInt } - (d ~ ("-" ~> d)) ^^ { - case min ~ max => - CharacterClass.DigitRange(min, max) + val oneOrMore: Parser[OneOrMore] = + (term <~ '+') ^^ { OneOrMore.apply } + + val length: Parser[Length] = + (term ~ ('{' ~> int <~ '}')) ^^ { + case t ~ l => + Length(t, l) } - } - lazy val lowerAlphaRange: Parser[CharacterClass.Term] = { - val c = "[a-z]".r ^^ { _.apply(0) } - c ~ ("-" ~> c) ^^ { - case min ~ max => - CharacterClass.CharRange(min, max) + val rangeFrom: Parser[RangeFrom] = + (term ~ ('{' ~> int <~ ",}")) ^^ { + case t ~ min => + RangeFrom(t, min) } - } - lazy val upperAlphaRange: Parser[CharacterClass.Term] = { - val c = "[A-Z]".r ^^ { _.apply(0) } - c ~ ("-" ~> c) ^^ { - case min ~ max => - CharacterClass.CharRange(min, max) + val range: Parser[Range] = + (term ~ ('{' ~> int ~ (',' ~> int <~ '}'))) ^^ { + case t ~ (min ~ max) => + Range(t, min, max) } - } - val word: Parser[CharacterClass.Term] = "\\w" ^^^ CharacterClass.WordChar - val digit: Parser[CharacterClass.Term] = "\\d" ^^^ CharacterClass.DigitChar - val space: Parser[CharacterClass.Term] = "\\s" ^^^ CharacterClass.SpaceChar - val wordBoundary: Parser[CharacterClass.Term] = "\\b" ^^^ CharacterClass.WordBoundary + optional | zeroOrMore | oneOrMore | length | rangeFrom | range + } - lazy val char: Parser[CharacterClass.Term] = { - val normalChars = "[^\\]\\\\]".r - val meta = "\\" | "]" | "-" - (("\\" ~> meta) | normalChars | "\\" ~> normalChars) ^^ CharacterClass.Literal - } + private lazy val group: Parser[Group] = { - lazy val characterClassTerm: Parser[CharacterClass.Term] = - word | digit | space | wordBoundary | digitRange | lowerAlphaRange | upperAlphaRange | char + lazy val nonCapturingGroup: Parser[Group] = + ("(?:" ~> regularExpression <~ ')') ~ regularExpression.? ^^ { + case inner ~ rest => + Group(inner, rest, capturing = false) + } - lazy val charClass = - ("[" ~> characterClassTerm.+ <~ "]") ^^ { CharacterClass(_: _*) } - lazy val negatedCharClass = - ("[^" ~> characterClassTerm.+ <~ "]") ^^ { terms => Negated(CharacterClass(terms: _*)) } + lazy val capturingGroup: Parser[Group] = + ('(' ~> regularExpression <~ ')') ~ regularExpression.? ^^ { + case inner ~ rest => + Group(inner, rest) + } - negatedCharClass | charClass + nonCapturingGroup | capturingGroup } - // terminals... - lazy val term: PackratParser[RegularExpression] = char | classes | negClasses | substitution - - // default classes - lazy val word: Parser[RegularExpression] = "\\w" ^^^ WordChar - lazy val digit: Parser[RegularExpression] = "\\d" ^^^ DigitChar - lazy val space: Parser[RegularExpression] = "\\s" ^^^ SpaceChar - lazy val any: Parser[RegularExpression] = "." ^^^ AnyChar - lazy val wordBoundary: Parser[RegularExpression] = "\\b" ^^^ WordBoundary - lazy val classes: Parser[RegularExpression] = word | digit | space | any | wordBoundary - - lazy val negWord: Parser[RegularExpression] = "\\W" ^^^ Negated(WordChar) - lazy val negDigit: Parser[RegularExpression] = "\\D" ^^^ Negated(DigitChar) - lazy val negSpace: Parser[RegularExpression] = "\\S" ^^^ Negated(SpaceChar) - lazy val negBoundary: Parser[RegularExpression] = "\\B" ^^^ Negated(WordBoundary) - lazy val negClasses: Parser[RegularExpression] = negWord | negDigit | negSpace | negBoundary - - lazy val substitution: Parser[RegularExpression] = "\\" ~> "[1-9]\\d*".r ^^ { - index => - Substitution(index.toInt) + private lazy val term: PackratParser[Term] = { + quantified | group | characterClass | meta | substitution | literalTerm } - lazy val bos: Parser[RegularExpression] = "^" ^^^ BOS - lazy val eos: Parser[RegularExpression] = "$" ^^^ EOS + private lazy val regularExpression: PackratParser[RegularExpression] = { - lazy val regularExpression: Parser[RegularExpression] = { - bos ~> expression <~ eos ^^ { expr => And(And(BOS, expr), EOS) } | - bos ~> expression ^^ { expr => And(BOS, expr) } | - expression <~ eos ^^ { expr => And(expr, EOS) } | - expression - } + lazy val or: Parser[Or] = + (regularExpression ~ ('|' ~> term)) ^^ { + case left ~ right => Or(left, right) + } - // literals - lazy val char: PackratParser[Literal] = { - val meta: Parser[String] = ")" | "(" | "$" | "[" | "." | "+" | "*" | "?" | "|" | "\\" | "{" - (("\\" ~> meta) | "[^|)(.+*?{\\[$\\\\]".r).+ ^^ { strs => Literal(strs.mkString("")) } + lazy val and: Parser[And] = + (regularExpression ~ term) ^^ { + case left ~ right => And(left, right) + } + + and | or | term } - def parse(string: String): RegularExpression = { + def parse(string: String): RegularExpression = regularExpression(new PackratReader(new CharSequenceReader(string))).get - } } diff --git a/src/main/scala/wolfendale/scalacheck/regexp/ast/AST.scala b/src/main/scala/wolfendale/scalacheck/regexp/ast/AST.scala index cb4a9de..3cfb6f6 100644 --- a/src/main/scala/wolfendale/scalacheck/regexp/ast/AST.scala +++ b/src/main/scala/wolfendale/scalacheck/regexp/ast/AST.scala @@ -1,56 +1,49 @@ package wolfendale.scalacheck.regexp.ast -sealed trait RegularExpression { +sealed trait RegularExpression - def |(that: RegularExpression): RegularExpression = - Or(this, that) +sealed trait Term extends RegularExpression - def &(that: RegularExpression): RegularExpression = - And(this, that) -} - -case class Literal(value: String) extends RegularExpression - -case object WordChar extends RegularExpression -case object DigitChar extends RegularExpression -case object SpaceChar extends RegularExpression -case object AnyChar extends RegularExpression +final case class Literal(value: Char) extends Term -case object BOS extends RegularExpression -case object EOS extends RegularExpression -case object WordBoundary extends RegularExpression +sealed trait Meta extends Term +case object Start extends Meta +case object End extends Meta +case object WordBoundary extends Meta +case object NegatedWordBoundary extends Meta -case class Group(term: RegularExpression) extends RegularExpression -case class Substitution(index: Int) extends RegularExpression -case class NonCapturingGroup(term: RegularExpression) extends RegularExpression +final case class Group(term: RegularExpression, rest: Option[RegularExpression] = None, capturing: Boolean = true) extends Term +final case class Substitution(index: Int) extends Term +final case class Or(left: RegularExpression, right: Term) extends Term +final case class And(left: RegularExpression, right: Term) extends Term -case class Or(t1: RegularExpression, t2: RegularExpression) extends RegularExpression -case class And(t1: RegularExpression, t2: RegularExpression) extends RegularExpression - -case class Negated(term: RegularExpression) extends RegularExpression - -sealed trait Quantified extends RegularExpression -case class Optional(term: RegularExpression) extends Quantified -case class ZeroOrMore(term: RegularExpression) extends Quantified -case class OneOrMore(term: RegularExpression) extends Quantified -case class Length(term: RegularExpression, min: Int) extends Quantified -case class RangeFrom(term: RegularExpression, min: Int) extends Quantified -case class Range(term: RegularExpression, min: Int, max: Int) extends Quantified +sealed trait Quantified extends Term +case class Optional(term: Term) extends Quantified +case class ZeroOrMore(term: Term) extends Quantified +case class OneOrMore(term: Term) extends Quantified +case class Length(term: Term, length: Int) extends Quantified +case class RangeFrom(term: Term, min: Int) extends Quantified +case class Range(term: Term, min: Int, max: Int) extends Quantified object CharacterClass { - sealed trait Term + final case class Literal(value: Char) extends Group.Term + case object Word extends CharacterClass with Term + case object Digit extends CharacterClass with Term + case object Space extends CharacterClass with Term + case object Any extends CharacterClass with Term + final case class Negated(characterClass: CharacterClass) extends CharacterClass with Term + final case class Intersection(left: Group.Term, right: Group.Term) extends CharacterClass - case class Literal(value: String) extends Term + final case class Group(terms: Group.Term*) extends CharacterClass with Term - case class DigitRange(min: Int, max: Int) extends Term - case class CharRange(min: Char, max: Char) extends Term + object Group { + sealed trait Term extends RegularExpression + } - case object WordChar extends Term - case object DigitChar extends Term - case object SpaceChar extends Term - case object WordBoundary extends Term + final case class Range(min: Char, max: Char) extends CharacterClass { + require(min <= max) + } } -case class CharacterClass(terms: CharacterClass.Term*) extends RegularExpression - +sealed trait CharacterClass extends CharacterClass.Group.Term diff --git a/src/test/scala/wolfendale/scalacheck/regexp/ASTProcessorSpec.scala b/src/test/scala/wolfendale/scalacheck/regexp/ASTProcessorSpec.scala index 7ef9666..8d80eb0 100644 --- a/src/test/scala/wolfendale/scalacheck/regexp/ASTProcessorSpec.scala +++ b/src/test/scala/wolfendale/scalacheck/regexp/ASTProcessorSpec.scala @@ -1,19 +1,19 @@ package wolfendale.scalacheck.regexp -import org.scalacheck.Gen +import org.scalacheck.{Gen, Shrink} import org.scalatest.prop.PropertyChecks import org.scalatest.{MustMatchers, WordSpec} import ast._ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { + implicit def noShrink[A]: Shrink[A] = Shrink.shrinkAny + ".apply" must { "generate a literal char" in { - val strGen: Gen[String] = Gen.alphaNumChar.map(_.toString) - - forAll(strGen) { + forAll(Gen.alphaNumChar) { literal => val gen: Gen[String] = @@ -21,7 +21,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { forAll(gen) { str => - str must fullyMatch regex literal + str must fullyMatch regex literal.toString } } } @@ -29,7 +29,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a word char" in { val gen: Gen[String] = - ASTProcessor.apply(WordChar) + ASTProcessor.apply(CharacterClass.Word) forAll(gen) { str => @@ -40,7 +40,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a digit char" in { val gen: Gen[String] = - ASTProcessor.apply(DigitChar) + ASTProcessor.apply(CharacterClass.Digit) forAll(gen) { str => @@ -51,7 +51,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a space char" in { val gen: Gen[String] = - ASTProcessor.apply(SpaceChar) + ASTProcessor.apply(CharacterClass.Space) forAll(gen) { str => @@ -63,7 +63,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate any char" in { val gen: Gen[String] = - ASTProcessor.apply(AnyChar) + ASTProcessor.apply(CharacterClass.Any) forAll(gen) { str => @@ -73,9 +73,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate something within a group" in { - val strGen: Gen[String] = Gen.alphaNumChar.map(_.toString) - - forAll(strGen) { + forAll(Gen.alphaNumChar) { literal => val gen: Gen[String] = @@ -83,24 +81,22 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { forAll(gen) { str => - str must fullyMatch regex literal + str must fullyMatch regex s"($literal)" } } } "generate something within a non capturing group" in { - val strGen: Gen[String] = Gen.alphaNumChar.map(_.toString) - - forAll(strGen) { + forAll(Gen.alphaNumChar) { literal => val gen: Gen[String] = - ASTProcessor.apply(NonCapturingGroup(Literal(literal))) + ASTProcessor.apply(Group(Literal(literal), capturing = false)) forAll(gen) { str => - str must fullyMatch regex literal + str must fullyMatch regex s"(?:$literal)" } } } @@ -108,7 +104,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate something from an alternation" in { val gen: Gen[String] = - ASTProcessor.apply(Or(Literal("a"), Literal("b"))) + ASTProcessor.apply(Or(Literal('a'), Literal('b'))) forAll(gen) { str => @@ -119,7 +115,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate multiple terms" in { val gen: Gen[String] = - ASTProcessor.apply(And(WordChar, DigitChar)) + ASTProcessor.apply(And(CharacterClass.Word, CharacterClass.Digit)) forAll(gen) { str => @@ -131,7 +127,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate an optional term" in { val gen: Gen[String] = - ASTProcessor.apply(Optional(WordChar)) + ASTProcessor.apply(Optional(CharacterClass.Word)) forAll(gen) { str => @@ -143,7 +139,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate one or more of a term" in { val gen: Gen[String] = - ASTProcessor.apply(OneOrMore(WordChar)) + ASTProcessor.apply(OneOrMore(CharacterClass.Word)) forAll(gen) { str => @@ -155,7 +151,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate zero or more of a term" in { val gen: Gen[String] = - ASTProcessor.apply(ZeroOrMore(WordChar)) + ASTProcessor.apply(ZeroOrMore(CharacterClass.Word)) forAll(gen) { str => @@ -166,7 +162,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a range of terms" in { val gen: Gen[String] = - ASTProcessor.apply(Range(WordChar, 3, 33)) + ASTProcessor.apply(Range(CharacterClass.Word, 3, 33)) forAll(gen) { str => @@ -177,7 +173,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a range of terms with no max" in { val gen: Gen[String] = - ASTProcessor.apply(RangeFrom(WordChar, 3)) + ASTProcessor.apply(RangeFrom(CharacterClass.Word, 3)) forAll(gen) { str => @@ -188,7 +184,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a specific number of terms" in { val gen: Gen[String] = - ASTProcessor.apply(Length(WordChar, 33)) + ASTProcessor.apply(Length(CharacterClass.Word, 33)) forAll(gen) { str => @@ -199,7 +195,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a negated word char" in { val gen: Gen[String] = - ASTProcessor.apply(Negated(WordChar)) + ASTProcessor.apply(CharacterClass.Negated(CharacterClass.Word)) forAll(gen) { str => @@ -210,7 +206,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a negated space char" in { val gen: Gen[String] = - ASTProcessor.apply(Negated(SpaceChar)) + ASTProcessor.apply(CharacterClass.Negated(CharacterClass.Space)) forAll(gen) { str => @@ -221,7 +217,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a negated digit char" in { val gen: Gen[String] = - ASTProcessor.apply(Negated(DigitChar)) + ASTProcessor.apply(CharacterClass.Negated(CharacterClass.Digit)) forAll(gen) { str => @@ -229,10 +225,10 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { } } - "generate an empty string for BOS anchor" in { + "generate an empty string for beginning of string anchor" in { val gen: Gen[String] = - ASTProcessor.apply(BOS) + ASTProcessor.apply(Start) forAll(gen) { str => @@ -240,10 +236,10 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { } } - "generate an empty string for EOS anchor" in { + "generate an empty string for end of string anchor" in { val gen: Gen[String] = - ASTProcessor.apply(EOS) + ASTProcessor.apply(End) forAll(gen) { str => @@ -264,17 +260,15 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a character class with a literal" in { - val strGen: Gen[String] = Gen.alphaNumChar.map(_.toString) - - forAll(strGen) { + forAll(Gen.alphaNumChar) { literal => val gen: Gen[String] = - ASTProcessor.apply(CharacterClass(CharacterClass.Literal(literal))) + ASTProcessor.apply(CharacterClass.Group(CharacterClass.Literal(literal))) forAll(gen) { str => - str must fullyMatch regex literal + str must fullyMatch regex s"[$literal]" } } } @@ -282,7 +276,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a character class which behaves like an alternation" in { val gen: Gen[String] = - ASTProcessor.apply(CharacterClass(CharacterClass.Literal("a"), CharacterClass.Literal("b"))) + ASTProcessor.apply(CharacterClass.Group(CharacterClass.Literal('a'), CharacterClass.Literal('b'))) forAll(gen) { str => @@ -294,7 +288,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a character class with a word char" in { val gen: Gen[String] = - ASTProcessor.apply(CharacterClass(CharacterClass.WordChar)) + ASTProcessor.apply(CharacterClass.Group(CharacterClass.Word)) forAll(gen) { str => @@ -306,7 +300,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a character class with a digit char" in { val gen: Gen[String] = - ASTProcessor.apply(CharacterClass(CharacterClass.DigitChar)) + ASTProcessor.apply(CharacterClass.Group(CharacterClass.Digit)) forAll(gen) { str => @@ -318,7 +312,7 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { "generate a character class with a space char" in { val gen: Gen[String] = - ASTProcessor.apply(CharacterClass(CharacterClass.SpaceChar)) + ASTProcessor.apply(CharacterClass.Group(CharacterClass.Space)) forAll(gen) { str => @@ -327,47 +321,13 @@ class ASTProcessorSpec extends WordSpec with MustMatchers with PropertyChecks { } } - "generate a character class with a lowercase char range" in { - - val genGen: Gen[(Char, Char, Gen[String])] = - for { - min <- Gen.choose('a', 'z') - max <- Gen.choose(min, 'z') - } yield (min, max, ASTProcessor.apply(CharacterClass(CharacterClass.CharRange(min, max)))) - - forAll(genGen) { - case (min, max, gen) => - forAll(gen) { - str => - str must fullyMatch regex s"[$min-$max]" - } - } - } - - "generate a character class with an uppercase char range" in { + "generate a character class with a range" in { val genGen: Gen[(Char, Char, Gen[String])] = for { - min <- Gen.choose('A', 'Z') - max <- Gen.choose(min, 'Z') - } yield (min, max, ASTProcessor.apply(CharacterClass(CharacterClass.CharRange(min, max)))) - - forAll(genGen) { - case (min, max, gen) => - forAll(gen) { - str => - str must fullyMatch regex s"[$min-$max]" - } - } - } - - "generate a character class with a digit range" in { - - val genGen: Gen[(Int, Int, Gen[String])] = - for { - min <- Gen.choose(0, 9) - max <- Gen.choose(min, 9) - } yield (min, max, ASTProcessor.apply(CharacterClass(CharacterClass.DigitRange(min, max)))) + min <- Gen.choose('0', '9') + max <- Gen.choose(min, '9') + } yield (min, max, ASTProcessor.apply(CharacterClass.Group(CharacterClass.Range(min, max)))) forAll(genGen) { case (min, max, gen) => diff --git a/src/test/scala/wolfendale/scalacheck/regexp/GenParserSpec.scala b/src/test/scala/wolfendale/scalacheck/regexp/GenParserSpec.scala index d319742..12c7412 100644 --- a/src/test/scala/wolfendale/scalacheck/regexp/GenParserSpec.scala +++ b/src/test/scala/wolfendale/scalacheck/regexp/GenParserSpec.scala @@ -3,55 +3,52 @@ package wolfendale.scalacheck.regexp import org.scalacheck.{Gen, Shrink} import org.scalatest.prop.PropertyChecks import org.scalatest.{MustMatchers, WordSpec} -import ast._ +import wolfendale.scalacheck.regexp.ast._ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { - implicit val noShrinkString: Shrink[String] = Shrink.shrinkAny - implicit def noShrinkSeq[A]: Shrink[Seq[A]] = Shrink.shrinkAny + implicit def noShrink[A]: Shrink[A] = Shrink.shrinkAny ".parse" must { - val neAlphaNum: Gen[String] = { - Gen - .nonEmptyListOf(Gen.alphaNumChar) - .map(_.mkString("")) - } - - val alphaNumSpaces: Gen[String] = { - Gen - .nonEmptyListOf(Gen.oneOf(Gen.alphaNumChar, Gen.const(" "))) - .map(_.mkString("")) - } + val alphaNumAndSpace: Gen[Char] = + Gen.alphaChar "parse a literal" in { - forAll(alphaNumSpaces) { - str => - GenParser.parse(str) mustEqual Literal(str) + forAll(alphaNumAndSpace) { + char => + GenParser.parse(s"$char") mustEqual Literal(char) } } - Seq("\\", ")", "(", "$", "[", ".", "+", "*", "|", "?", "\\w", "\\d", "\\s").foreach { + Seq('\\', ')', '(', '$', '[', '.', '+', '*', '|', '?').foreach { //, "\\w", "\\d", "\\s").foreach { meta => s"parse an escaped `$meta`" in { GenParser.parse(s"\\$meta") mustEqual Literal(meta) } } + Seq('w', 'd', 's', 'W', 'D', 'S').foreach { + meta => + s"parse an escaped `\\$meta`" in { + GenParser.parse(s"\\\\$meta") mustEqual Literal(meta) + } + } + "parse a word char" in { - GenParser.parse("\\w") mustEqual WordChar + GenParser.parse("\\w") mustEqual CharacterClass.Word } "parse a digit char" in { - GenParser.parse("\\d") mustEqual DigitChar + GenParser.parse("\\d") mustEqual CharacterClass.Digit } "parse a space char" in { - GenParser.parse("\\s") mustEqual SpaceChar + GenParser.parse("\\s") mustEqual CharacterClass.Space } "parse a wildcard" in { - GenParser.parse(".") mustEqual AnyChar + GenParser.parse(".") mustEqual CharacterClass.Any } "parse a literal character from a character class" in { @@ -61,51 +58,23 @@ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { forAll(gen) { a => - GenParser.parse(s"[${a.mkString("")}]") mustEqual CharacterClass( - a.map(c => CharacterClass.Literal(c.toString)): _* + GenParser.parse(s"[${a.mkString("")}]") mustEqual CharacterClass.Group( + a.map(c => CharacterClass.Literal(c)): _* ) } } - "parse a digit range" in { - - val gen: Gen[(Int, Int)] = - for { - min <- Gen.chooseNum(0, 9) - max <- Gen.chooseNum(min, 9) - } yield (min, max) - - forAll(gen) { - case (min, max) => - GenParser.parse(s"[$min-$max]") mustEqual CharacterClass(CharacterClass.DigitRange(min, max)) - } - } - - "fail to parse an out of order digit range" ignore { - - val gen: Gen[(Int, Int)] = - for { - min <- Gen.chooseNum(1, 9) - max <- Gen.chooseNum(0, min) - } yield (min, max) - - forAll(gen) { - case (min, max) => - an[Exception] mustBe thrownBy(GenParser.parse(s"[$min-$max]")) - } - } - - "parse a lowercase char range" in { + "parse a char range" in { val gen: Gen[(Char, Char)] = for { - min <- Gen.alphaLowerChar - max <- Gen.choose(min, 'z') + min <- Gen.choose(Char.MinValue, Char.MaxValue) + max <- Gen.choose(min, Char.MaxValue) } yield (min, max) forAll(gen) { case (min, max) => - GenParser.parse(s"[$min-$max]") mustEqual CharacterClass(CharacterClass.CharRange(min, max)) + GenParser.parse(s"[$min-$max]") mustEqual CharacterClass.Group(CharacterClass.Range(min, max)) } } @@ -113,8 +82,8 @@ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { val gen: Gen[(Char, Char)] = for { - min <- Gen.choose('b', 'z') - max <- Gen.choose('a', min) + min <- Gen.choose((Char.MinValue + 1).toChar, Char.MaxValue) + max <- Gen.choose(Char.MinValue, min) } yield (min, max) forAll(gen) { @@ -123,152 +92,138 @@ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { } } - "parse an uppercase char range" in { - - val gen: Gen[(Char, Char)] = - for { - min <- Gen.alphaUpperChar - max <- Gen.choose(min, 'Z') - } yield (min, max) - - forAll(gen) { - case (min, max) => - GenParser.parse(s"[$min-$max]") mustEqual CharacterClass(CharacterClass.CharRange(min, max)) - } - } - "parse a word char in a character class" in { - GenParser.parse("[\\w]") mustEqual CharacterClass(CharacterClass.WordChar) + GenParser.parse("[\\w]") mustEqual CharacterClass.Group(CharacterClass.Word) } "parse a digit char in a character class" in { - GenParser.parse("[\\d]") mustEqual CharacterClass(CharacterClass.DigitChar) + GenParser.parse("[\\d]") mustEqual CharacterClass.Group(CharacterClass.Digit) } "parse a space char in a character class" in { - GenParser.parse("[\\s]") mustEqual CharacterClass(CharacterClass.SpaceChar) + GenParser.parse("[\\s]") mustEqual CharacterClass.Group(CharacterClass.Space) } - Seq("\\", "]").foreach { + Seq('\\', ']').foreach { meta => s"parse an escaped `$meta` in a character class" in { - GenParser.parse(s"[\\$meta]") mustEqual CharacterClass(CharacterClass.Literal(meta)) + GenParser.parse(s"[\\$meta]") mustEqual CharacterClass.Group(CharacterClass.Literal(meta)) } } - Seq("w", "d", "s").foreach { + Seq('w', 'd', 's').foreach { meta => s"parse an escaped `\\$meta` in a character class" in { - GenParser.parse(s"[\\\\$meta]") mustEqual CharacterClass(CharacterClass.Literal("\\"), CharacterClass.Literal(meta)) + GenParser.parse(s"[\\\\$meta]") mustEqual CharacterClass.Group(CharacterClass.Literal(meta)) } } "parse alternates of multiple terms" in { - forAll(neAlphaNum, neAlphaNum, neAlphaNum) { + forAll(alphaNumAndSpace, alphaNumAndSpace, alphaNumAndSpace) { case (a, b, c) => - GenParser.parse(s"$a|$b|$c") mustEqual Literal(a) | Literal(b) | Literal(c) + GenParser.parse(s"$a|$b|$c") mustEqual Or(Or(Literal(a), Literal(b)), Literal(c)) } } "parse a group" in { - forAll(neAlphaNum) { + forAll(alphaNumAndSpace) { a => GenParser.parse(s"(($a))") mustEqual Group(Group(Literal(a))) } } "parse a non-capturing group" in { - forAll(neAlphaNum) { + forAll(alphaNumAndSpace) { a => - GenParser.parse(s"(?:(?:$a))") mustEqual NonCapturingGroup(NonCapturingGroup(Literal(a))) + GenParser.parse(s"(?:(?:$a))") mustEqual Group(Group(Literal(a), capturing = false), capturing = false) } } "parse a group with an alternate in it" in { - forAll(neAlphaNum, neAlphaNum, neAlphaNum, neAlphaNum) { + forAll(alphaNumAndSpace, alphaNumAndSpace, alphaNumAndSpace, alphaNumAndSpace) { case (a, b, c, d) => - GenParser.parse(s"$a($b|$c)$d") mustEqual And(And(Literal(a), Group(Literal(b) | Literal(c))), Literal(d)) + GenParser.parse(s"$a($b|$c)$d") mustEqual And(Literal(a), Group(Or(Literal(b), Literal(c)), Some(Literal(d)))) } } "parse a nested group with an expression to the right" in { - forAll(neAlphaNum, neAlphaNum) { + forAll(alphaNumAndSpace, alphaNumAndSpace) { case (a, b) => - GenParser.parse(s"(($a)$b)") mustEqual Group(And(Group(Literal(a)), Literal(b))) + GenParser.parse(s"(($a)$b)") mustEqual Group(Group(Literal(a), Some(Literal(b)))) } } "parse a nested group with an expression to the left" in { - forAll(neAlphaNum, neAlphaNum) { + forAll(alphaNumAndSpace, alphaNumAndSpace) { case (a, b) => GenParser.parse(s"($a($b))") mustEqual Group(And(Literal(a), Group(Literal(b)))) } } "parse an optional literal" in { - GenParser.parse("a?") mustEqual Optional(Literal("a")) + GenParser.parse("a?") mustEqual Optional(Literal('a')) } "parse an optional literal (bind to quantifier rather than surrounding literals)" in { - GenParser.parse("abc?") mustEqual And(Literal("ab"), Optional(Literal("c"))) + GenParser.parse("abc?") mustEqual And(And(Literal('a'), Literal('b')), Optional(Literal('c'))) } "parse an optional group" in { - GenParser.parse("(abc)?") mustEqual Optional(Group(Literal("abc"))) + GenParser.parse("(abc)?") mustEqual Optional(Group(And(And(Literal('a'), Literal('b')), Literal('c')))) } "parse an optional with an alternate" in { - GenParser.parse("ab|c?") mustEqual Or(Literal("ab"), Optional(Literal("c"))) + GenParser.parse("ab|c?") mustEqual Or(And(Literal('a'), Literal('b')), Optional(Literal('c'))) } "parse an optional with an alternate (left)" in { - GenParser.parse("ab?|c") mustEqual Or(And(Literal("a"), Optional(Literal("b"))), Literal("c")) + GenParser.parse("ab?|c") mustEqual Or(And(Literal('a'), Optional(Literal('b'))), Literal('c')) } "parse a 'zeroOrMore' literal" in { - GenParser.parse("a*") mustEqual ZeroOrMore(Literal("a")) + GenParser.parse("a*") mustEqual ZeroOrMore(Literal('a')) } "parse a 'zeroOrMore' literal (bind to quantifier rather than surrounding literals)" in { - GenParser.parse("abc*") mustEqual And(Literal("ab"), ZeroOrMore(Literal("c"))) + GenParser.parse("abc*") mustEqual And(And(Literal('a'), Literal('b')), ZeroOrMore(Literal('c'))) } "parse a 'zeroOrMore' group" in { - GenParser.parse("(abc)*") mustEqual ZeroOrMore(Group(Literal("abc"))) + GenParser.parse("(abc)*") mustEqual ZeroOrMore(Group(And(And(Literal('a'), Literal('b')), Literal('c')))) } "parse a 'zeroOrMore' with an alternate" in { - GenParser.parse("ab|c*") mustEqual Or(Literal("ab"), ZeroOrMore(Literal("c"))) + GenParser.parse("ab|c*") mustEqual Or(And(Literal('a'), Literal('b')), ZeroOrMore(Literal('c'))) } "parse a 'zeroOrMore' with an alternate (left)" in { - GenParser.parse("ab*|c") mustEqual Or(And(Literal("a"), ZeroOrMore(Literal("b"))), Literal("c")) + GenParser.parse("ab*|c") mustEqual Or(And(Literal('a'), ZeroOrMore(Literal('b'))), Literal('c')) } "parse a 'oneOrMore' literal" in { - GenParser.parse("a+") mustEqual OneOrMore(Literal("a")) + GenParser.parse("a+") mustEqual OneOrMore(Literal('a')) } "parse a 'oneOrMore' literal (bind to quantifier rather than surrounding literals)" in { - GenParser.parse("abc+") mustEqual And(Literal("ab"), OneOrMore(Literal("c"))) + GenParser.parse("abc+") mustEqual And(And(Literal('a'), Literal('b')), OneOrMore(Literal('c'))) } "parse a 'oneOrMore' group" in { - GenParser.parse("(abc)+") mustEqual OneOrMore(Group(Literal("abc"))) + GenParser.parse("(abc)+") mustEqual OneOrMore(Group(And(And(Literal('a'), Literal('b')), Literal('c')))) } "parse a 'oneOrMore' with an alternate" in { - GenParser.parse("ab|c+") mustEqual Or(Literal("ab"), OneOrMore(Literal("c"))) + GenParser.parse("ab|c+") mustEqual Or(And(Literal('a'), Literal('b')), OneOrMore(Literal('c'))) } "parse a 'oneOrMore' with alternate (left)" in { - GenParser.parse("ab+|c") mustEqual Or(And(Literal("a"), OneOrMore(Literal("b"))), Literal("c")) + GenParser.parse("ab+|c") mustEqual Or(And(Literal('a'), OneOrMore(Literal('b'))), Literal('c')) } "parse a 'rangeFrom'" in { forAll(Gen.chooseNum(0, 100)) { min => - GenParser.parse(s"a{$min,}") mustEqual RangeFrom(Literal("a"), min) + GenParser.parse(s"a{$min,}") mustEqual RangeFrom(Literal('a'), min) } } @@ -282,7 +237,7 @@ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { forAll(gen) { case (min, max) => - GenParser.parse(s"a{$min,$max}") mustEqual Range(Literal("a"), min, max) + GenParser.parse(s"a{$min,$max}") mustEqual Range(Literal('a'), min, max) } } @@ -303,46 +258,70 @@ class GenParserSpec extends WordSpec with MustMatchers with PropertyChecks { "parse a specific length repeated pattern" in { forAll(Gen.choose(1, 100)) { length => - GenParser.parse(s"ab{$length}") mustEqual And(Literal("a"), Length(Literal("b"), length)) + GenParser.parse(s"ab{$length}") mustEqual And(Literal('a'), Length(Literal('b'), length)) } } "parse word boundaries" in { - GenParser.parse("\\bcat\\b") mustEqual And(And(WordBoundary, Literal("cat")), WordBoundary) + GenParser.parse("\\bcat\\b") mustEqual And(And(And(And(WordBoundary, Literal('c')), Literal('a')), Literal('t')), WordBoundary) } "parse string boundaries" in { - GenParser.parse("^cat$") mustEqual And(And(BOS, Literal("cat")), EOS) + GenParser.parse("^cat$") mustEqual And(And(And(And(Start, Literal('c')), Literal('a')), Literal('t')), End) } "parse substitutions" in { - GenParser.parse("(a)\\1") mustEqual And(Group(Literal("a")), Substitution(1)) + GenParser.parse("(a)\\1") mustEqual Group(Literal('a'), Some(Substitution(1))) } "parse a negated word char" in { - GenParser.parse("\\W") mustEqual Negated(WordChar) + GenParser.parse("\\W") mustEqual CharacterClass.Negated(CharacterClass.Word) } "parse a negated space char" in { - GenParser.parse("\\S") mustEqual Negated(SpaceChar) + GenParser.parse("\\S") mustEqual CharacterClass.Negated(CharacterClass.Space) } "parse a negated digit char" in { - GenParser.parse("\\D") mustEqual Negated(DigitChar) + GenParser.parse("\\D") mustEqual CharacterClass.Negated(CharacterClass.Digit) } "parse a negated word boundary" in { - GenParser.parse("\\B") mustEqual Negated(WordBoundary) + GenParser.parse("\\B") mustEqual NegatedWordBoundary } "parse a negated character class" in { - GenParser.parse("[^abc]") mustEqual Negated(CharacterClass(CharacterClass.Literal("a"), CharacterClass.Literal("b"), CharacterClass.Literal("c"))) + GenParser.parse("[^abc]") mustEqual CharacterClass.Negated(CharacterClass.Group(CharacterClass.Literal('a'), CharacterClass.Literal('b'), CharacterClass.Literal('c'))) } "parse input containing EOS character" in { - forAll(neAlphaNum) { a => - GenParser.parse(a + "$") mustEqual And(Literal(a), EOS) + forAll(alphaNumAndSpace) { a => + GenParser.parse(a + "$") mustEqual And(Literal(a), End) } } + + "parse a character class with intersections" in { + GenParser.parse("[a&&a]") mustEqual CharacterClass.Group(CharacterClass.Intersection(CharacterClass.Literal('a'), CharacterClass.Literal('a'))) + } + + "parse a character class with multiple intersections" in { + GenParser.parse("[a&&a&&a]") mustEqual CharacterClass.Group(CharacterClass.Intersection(CharacterClass.Literal('a'), CharacterClass.Intersection(CharacterClass.Literal('a'), CharacterClass.Literal('a')))) + } + + "parse a more complicated character class with intersections" in { + GenParser.parse("[\\w&&[abc]&&a]") mustEqual CharacterClass.Group( + CharacterClass.Intersection( + CharacterClass.Word, + CharacterClass.Intersection( + CharacterClass.Group( + CharacterClass.Literal('a'), + CharacterClass.Literal('b'), + CharacterClass.Literal('c') + ), + CharacterClass.Literal('a') + ) + ) + ) + } } }