gruhn
diff --git a/‎README.md
Lines changed: 7 additions & 80 deletions b/‎README.md
Lines changed: 7 additions & 80 deletions
diff --git a/‎benchmark/aoc2023-day12.js
Lines changed: 37 additions & 55 deletions b/‎benchmark/aoc2023-day12.js
Lines changed: 37 additions & 55 deletions
diff --git a/‎benchmark/demo.js
Lines changed: 25 additions & 0 deletions b/‎benchmark/demo.js
Lines changed: 25 additions & 0 deletions
diff --git a/‎benchmark/filter-vs-intersection.js
Lines changed: 3 additions & 2 deletions b/‎benchmark/filter-vs-intersection.js
Lines changed: 3 additions & 2 deletions
diff --git a/‎benchmark/parser-bench.js
Lines changed: 3 additions & 3 deletions b/‎benchmark/parser-bench.js
Lines changed: 3 additions & 3 deletions
diff --git a/‎benchmark/toStdRegex_output_length.js
Lines changed: 3 additions & 3 deletions b/‎benchmark/toStdRegex_output_length.js
Lines changed: 3 additions & 3 deletions
diff --git a/‎package.json
Lines changed: 0 additions & 4 deletions b/‎package.json
Lines changed: 0 additions & 4 deletions
diff --git a/‎src/char-set.ts
Lines changed: 2 additions & 4 deletions b/‎src/char-set.ts
Lines changed: 2 additions & 4 deletions
@@ -4,22 +4,19 @@ Zero-dependency TypeScript library for regex intersection, complement and other
 These are surprisingly hard to come by for any programming language.
 
 ```typescript
-import { intersection, size, enumerate } from '@gruhn/regex-utils'
+import { RB } from '@gruhn/regex-utils'
 
-// `intersection` combines multiple regex into one:
-const passwordRegex = intersection(
-  /^[a-zA-Z0-9]{12,32}$/, // 12-32 alphanumeric characters
-  /[0-9]/, // at least one number
-  /[A-Z]/, // at least one upper case letter   
-  /[a-z]/, // at least one lower case letter
-)
+const passwordRegex = RB(/^[a-zA-Z0-9]{12,32}$/) // 12-32 alphanumeric characters
+  .and(/[0-9]/) // at least one number
+  .and(/[A-Z]/) // at least one upper case letter   
+  .and(/[a-z]/) // at least one lower case letter
 
 // `size` calculates the number of strings matching the regex: 
-console.log(size(passwordRegex))
+console.log(passwordRegex.size())
 // 2301586451429392354821768871006991487961066695735482449920n
 
 // `enumerate` returns a stream of strings matching the regex:
-for (const sample of enumerate(passwordRegex).take(10)) {
+for (const sample of passwordRegex.enumerate().take(10)) {
   console.log(sample)
 }
 // aaaaaaaaaaA0
@@ -40,76 +37,6 @@ for (const sample of enumerate(passwordRegex).take(10)) {
 npm install @gruhn/regex-utils
 ```
 
-## High- vs. Low-Level API
-
-There is a high-level API and a low-level API:
-
- - [high-level API documentation](https://gruhn.github.io/regex-utils/modules/High-level_API.html)
- - [low-level API documentation](https://gruhn.github.io/regex-utils/modules/Low-Level_API.html)
-
-The high-level API operates directly on native JavaScript `RegExp` instances,
-which is more convenient but also requires parsing the regular expression.
-The low-level API operates on an internal representation
-which skips parsing step and is more efficient when combining multiple functions.
-For example, say you want to know how many strings match the intersection
-of two regular expressions:
-
-```typescript
-import { size, intersection } from '@gruhn/regex-utils'
-
-size(intersection(regex1, regex2))
-```
-
-This:
-1. parses the two input `RegExp`
-2. computes the intersection
-3. converts the result back to `RegExp`
-4. parses that again
-5. computes the size
-
-Step (1) should be fast for small handwritten regex.
-But the intersection of two regex can be quite large, 
-which can make step (3) and (4) quite costly.
-With the low-level API, step (3) and step (4) can be eliminated:
-
-```typescript
-import * as RE from '@gruhn/regex-utils/low-level-api'
-
-RE.size(
-  RE.toStdRegex(
-    RE.and(
-      RE.parse(regex1),
-      RE.parse(regex2)
-    )
-  )
-)
-```
-
-<!--
-
-## Todo Utilities
-
-* recognize regex prone to catastrophic backtracking
-  - https://www.regular-expressions.info/catastrophic.html
-  - https://www.youtube.com/watch?v=DDe-S3uef2w
-* check equivalence of two regex or find counterexample string
-
--->
-
-## Limitations
-
-The library implements a custom parser for regular expressions,
-so only a subset of the syntax is supported:
- - quantifiers: `*`, `+`, `?`, `{3,5}`, ...
- - alternation: `|`
- - character classes: `.`, `\w`, `[a-z]`, ...
- - optional start/end markers: `^` / `$` but only at the start/end
-   (technically they are allowed anywhere in the expression)
- - escaped meta characters: `\$`, `\.`, ...
- - (non-)capturing groups: `(...)`, `(?...)`
- - positive/negative lookahead: `(?!...)`, `(?=...)`
-Regex flags are not supported at all.
-
 ## References
 
 Heavily informed by these papers:
 
@@ -1,65 +1,59 @@
 import fs from 'fs'
-import * as RE from '../dist/low-level-api.js'
+import { RB } from '../dist/index.js'
+import { assert } from '../dist/utils.js'
 
 const input = fs.readFileSync('./benchmark/aoc2023-day12_input.txt', 'utf-8')
   .trim()
   .split('\n')
   .map(line => line.split(' '))
 
 /**
- * Maps pattern like "#?...##?#" to regex like `#(.|#)...##(.|#)#`
+ * Maps pattern like "#?...##?#" to regex like /^#(o|#)ooo##(o|#)#$/
  */
 function leftToRegex(pattern) {
   const inner = [...pattern].map(char => {
     switch (char) {
-      case '.': return RE.singleChar('.')
-      case '#': return RE.singleChar('#')
-      case '?': return RE.or([RE.singleChar('.'), RE.singleChar('#')])
+      case '.': return RB('o')
+      case '#': return RB('#')
+      case '?': return RB('o').or('#')
     }
     throw 'unknown symbol: ' + char
   })
-  return RE.seq(inner)
-}
-
-function interleave(array, sep) {
-  if (array.length <= 1) {
-    return array
-  } else {
-    const [ head, ...tail ] = array
-    return [head, sep, ...interleave(tail, sep) ]
-  }
+  return inner.reduce((acc, re) => acc.concat(re))
 }
 
 /**
- * Maps pattern like "2,4,3" to regex like `.*##.+####.+###.*`
+ * Maps pattern like "2,4,3" to regex like /^o*##o+####o+###o*$/
  */
 function rightToRegex(pattern) { 
-  const regexStartEnd = RE.repeat(RE.singleChar('.')) // .*
-  const regexBetween = RE.repeat(RE.singleChar('.'), { min: 1 }) // .+
-
-  const inner = pattern.split(',')
+  const [first, ...rest] = pattern.split(',')
     .map(digit => parseInt(digit))
-    .map(count => RE.repeat(RE.singleChar('#'), count))
+    .map(count => RB('#').repeat(count))
+
+  const start = RB('o').repeat() // o*
+  const end = RB('o').repeat() // o*
+  const separator = RB('o').repeat({ min: 1 }) // o+
 
-  return RE.seq([
-    regexStartEnd,
-    RE.seq(interleave(inner, regexBetween)),
-    regexStartEnd,
-  ])
+  let result = start.concat(first)
+  for (const item of rest) {
+    result = result.concat(separator).concat(item)
+  }
+  result = result.concat(end)
+
+  return result
 }
 
-function part1() {
+function solve(patternPairs) {
   const startTime = performance.now()
   let totalCount = 0n
 
-  input.forEach(([left, right], i) => {
+  patternPairs.forEach(([left, right], i) => {
     const leftRegex = leftToRegex(left)
     const rightRegex = rightToRegex(right)
 
-    // Compute intersection of the two regex: 
-    const intersection = RE.toStdRegex(RE.and([leftRegex, rightRegex]))
-    // And count the number of matching strings using `size`:
-    const count = RE.size(intersection)
+    // Compute intersection of the two regex and
+    // count the number of matching strings using `size`:
+    const count = RB(leftRegex).and(rightRegex).size()
 
     console.log(i, ':', count)
     totalCount += count
@@ -69,29 +63,17 @@ function part1() {
   return { totalCount, time }
 }
 
-function part2() {
-  const startTime = performance.now()
-  let totalCount = 0n
+const part1 = solve(input) 
+const part2 = solve(input.map(([left, right]) => [
+  Array(5).fill(left).join('?'),
+  Array(5).fill(right).join(',')
+]))
 
-  input.forEach(([left, right], i) => {
-    const leftRegex = leftToRegex(Array(5).fill(left).join('?'))
-    const rightRegex = rightToRegex(Array(5).fill(right).join(','))
-
-    // Compute intersection of the two regex: 
-    const intersection = RE.toStdRegex(RE.and([leftRegex, rightRegex]))
-    // And count the number of matching strings using `size`:
-    const count = RE.size(intersection)
-
-    console.log(i, ':', count)
-    totalCount += count
-  })
-
-  const time = performance.now() - startTime
-  return { time, totalCount }
-}
+// best time: 992ms
+console.log('Part 1:', part1.totalCount, `(time: ${Math.ceil(part1.time)}ms)`)
 
-const sol1 = part1() // best time:   992ms
-const sol2 = part2() // best time: 13182ms
+// best time: 11950ms
+console.log('Part 2:', part2.totalCount, `(time: ${Math.ceil(part2.time)}ms)`)
 
-console.log('Part 1:', sol1.totalCount, `(time: ${Math.ceil(sol1.time)}ms)`)
-console.log('Part 2:', sol2.totalCount, `(time: ${Math.ceil(sol2.time)}ms)`)
+assert(part1.totalCount === 7191n)
+assert(part2.totalCount === 6512849198636n)
@@ -0,0 +1,25 @@
+import { RB } from '../dist/index.js'
+
+const passwordRegex = RB(/^[a-zA-Z0-9]{12,32}$/) // 12-32 alphanumeric characters
+  .and(/[0-9]/) // at least one number
+  .and(/[A-Z]/) // at least one upper case letter   
+  .and(/[a-z]/) // at least one lower case letter
+
+// `size` calculates the number of strings matching the regex: 
+console.log(passwordRegex.size())
+// 2301586451429392354821768871006991487961066695735482449920n
+
+// `enumerate` returns a stream of strings matching the regex:
+for (const sample of passwordRegex.enumerate().take(10)) {
+  console.log(sample)
+}
+// aaaaaaaaaaA0
+// aaaaaaaaaa0A
+// aaaaaaaaaAA0
+// aaaaaaaaaA00
+// aaaaaaaaaaA1
+// aaaaaaaaa00A
+// baaaaaaaaaA0
+// AAAAAAAAAA0a
+// aaaaaaaaaAA1
+// aaaaaaaaaa0B
@@ -1,11 +1,12 @@
 import fc from 'fast-check'
-import { intersection } from '../dist/index.js'
+import { RB } from '../dist/index.js'
 
 const emailRegex = /^[\w\-\.]+@([\w-]+\.)+[\w-]{2,}$/
 
 function runIntersection(sampleCount) {
   const startTime = performance.now()
-  fc.sample(fc.stringMatching(intersection(/^.{3,10}$/, emailRegex)), sampleCount)
+  const intersection = RB(emailRegex).and(/^.{3,10}$/).toRegExp()
+  fc.sample(fc.stringMatching(intersection), sampleCount)
   return performance.now() - startTime
 }
 
 
@@ -1,6 +1,6 @@
 import fs from 'fs'
 import { parseRegexString } from '../dist/regex-parser.js'
-import { enumerate } from '../dist/index.js'
+import { RB } from '../dist/index.js'
 
 export function* readDataset() {
   const jsonStr = fs.readFileSync('./benchmark/regex-dataset.json', 'utf-8')
@@ -24,9 +24,9 @@ for (const { regex, flags } of readDataset()) {
   try {
     const time = performance.now()
     // parseRegexString(regex)
-    const regexp = new RegExp(regex, flags)
+    const regexp = RB(new RegExp(regex, flags))
     console.log('====', regexp, '====')
-    for (const word of enumerate(regexp).take(10)) {
+    for (const word of RB(regexp).enumerate().take(10)) {
       console.log(JSON.stringify(word))
     }
     console.log(`time: ${Math.round(performance.now() - time)}ms`)
 
@@ -1,8 +1,8 @@
 import fc from 'fast-check'
 import * as RE from '../dist/regex.js'
 import { ParseError } from '../dist/parser.js'
-import { UnsupportedSyntaxError } from '../dist/regex-parser.js'
-import { parse, toStdRegex } from '../dist/low-level-api.js'
+import { UnsupportedSyntaxError, parseRegExp } from '../dist/regex-parser.js'
+import { toStdRegex } from '../dist/dfa.js'
 import randomRegexDataset from './regex_random_unique_no-nested-star_1000.js'
 import handwrittenRegexDataset from './regex_handwritten.js'
 
@@ -17,7 +17,7 @@ function run(inputRegExp, index) {
   console.log('#' + index, inputRegExp)
   const startTime = performance.now()
 
-  const inputRegex = parse(inputRegExp)
+  const inputRegex = parseRegExp(inputRegExp)
   const outputRegex = toStdRegex(inputRegex)
   const outputRegExp = RE.toRegExp(outputRegex)
 
 
@@ -17,10 +17,6 @@
     ".": {
       "import": "./dist/index.js",
       "types": "./dist/index.d.ts"
-    },
-    "./low-level-api": {
-      "import": "./dist/low-level-api.js",
-      "types": "./dist/low-level-api.d.ts"
     }
   },
   "files": [
 
@@ -1,11 +1,9 @@
-import { adjacentPairs, assert, checkedAllCases, hashNums, hashStr, xor, zip } from './utils'
+import { assert, checkedAllCases, hashStr, xor } from './utils'
 import * as Range from './code-point-range'
 import * as Stream from './stream'
 
 type WithHash<T> = T & { hash: number }
 
-type EmptyCharSet = WithHash<{ type: 'empty' }>
-
 // TODO: ensure tree is balanced
 type CharSetWithoutHash =
   | { type: 'empty' }
@@ -202,7 +200,7 @@ export function deleteRange(set: CharSet, range: Range.CodePointRange): CharSet
   } else if (set.type === 'empty') {
     return empty
   } else if (set.type === 'node') {
-    const [rangeBeforeStart, rangeRest1] = Range.splitAt(set.range.start-1, range)
+    const [rangeBeforeStart, _rangeRest1] = Range.splitAt(set.range.start-1, range)
     const [rangeRest2, rangeAfterEnd] = Range.splitAt(set.range.end, range)
 
     const newLeft = deleteRange(set.left, rangeBeforeStart)