Skip to content

Commit 5be9328

Browse files
committed
feat(parser): allow start/end anchor everywhere
Also expose `UnsupportedSyntaxError`.
1 parent 3d9f1dd commit 5be9328

16 files changed

+647
-357
lines changed

.github/workflows/deploy-docs.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,8 @@ jobs:
3535
uses: actions/setup-node@v4
3636
with:
3737
node-version: "lts/*"
38-
- name: Install dependencies
39-
run: npm clean-install
40-
- name: Generate Docs
41-
run: make docs
38+
- name: Install & Build
39+
run: make
4240
- name: Setup Pages
4341
uses: actions/configure-pages@v5
4442
- name: Upload artifact

.github/workflows/release.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,10 @@ jobs:
2525
uses: actions/setup-node@v4
2626
with:
2727
node-version: "lts/*"
28-
- name: Install dependencies
29-
run: npm clean-install
28+
- name: Install & Build
29+
run: make
3030
- name: Test
3131
run: npm test
32-
- name: Build
33-
run: make dist
3432
- name: Verify the integrity of provenance attestations and registry signatures for installed dependencies
3533
run: npm audit signatures
3634
- name: Release

benchmark/parser-bench.ts

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import fs from 'fs'
2-
import { parseRegexString } from '../dist/regex-parser.js'
3-
import { RB } from '../dist/index.js'
2+
import { parseRegExp } from '../dist/regex-parser.js'
43

54
export function* readDataset() {
65
const jsonStr = fs.readFileSync('./benchmark/regex-dataset.json', 'utf-8')
@@ -17,29 +16,37 @@ export function* readDataset() {
1716
}
1817
}
1918

20-
let hasError = 0
21-
let noError = 0
22-
let totalParseTime = 0
19+
const dataset = [...readDataset()]
2320

24-
for (const { regex, flags } of readDataset()) {
21+
let parseErrorCount = 0
22+
23+
for (const { regex, flags } of dataset) {
2524
try {
2625
// parseRegexString(regex)
2726
const regexp = new RegExp(regex, flags)
2827
console.log('====', regexp, '====')
2928

30-
const timeStart = performance.now()
31-
const parsed = RB(regexp)
32-
const timeEnd = performance.now()
29+
performance.mark('parse-start')
30+
const parsed = parseRegExp(regexp)
31+
performance.mark('parse-end')
32+
performance.measure('parse-duration', 'parse-start', 'parse-end')
3333

34-
console.log(`time: ${Math.round(timeEnd - timeStart)}ms`)
35-
totalParseTime += timeEnd - timeStart
36-
noError++
3734
} catch (e) {
38-
// console.error(new RegExp(regex, flags))
39-
hasError++
35+
console.error(new RegExp(regex, flags))
36+
parseErrorCount++
4037
}
4138
}
4239

43-
console.log('error ratio:', hasError, '/', hasError + noError)
40+
const totalParseTime = performance.getEntriesByName('parse-duration')
41+
.map(entry => entry.duration)
42+
.reduce((acc,d) => acc + d, 0)
43+
44+
const maxParseTime = performance.getEntriesByName('parse-duration')
45+
.map(entry => entry.duration)
46+
.reduce((acc,d) => Math.max(acc, d), -Infinity)
47+
48+
console.log('error ratio:', parseErrorCount, '/', dataset.length)
4449
console.log('total parse time:', Math.round(totalParseTime), 'ms')
50+
console.log('avg parse time:', Math.round(totalParseTime / (dataset.length - parseErrorCount)), 'ms')
51+
console.log('max parse time:', Math.round(maxParseTime), 'ms')
4552

benchmark/toStdRegex_output_length.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
import fc from 'fast-check'
2-
import * as RE from '../dist/regex.js'
3-
import { ParseError } from '../dist/parser.js'
4-
import { UnsupportedSyntaxError, parseRegExp } from '../dist/regex-parser.js'
5-
import { toStdRegex } from '../dist/dfa.js'
6-
import randomRegexDataset from './regex_random_unique_no-nested-star_1000.js'
7-
import handwrittenRegexDataset from './regex_handwritten.js'
2+
import * as RE from '../src/regex'
3+
import { UnsupportedSyntaxError, ParseError } from '../src/index'
4+
import { parseRegExp } from '../src/regex-parser'
5+
import { toStdRegex } from '../src/dfa.js'
6+
import randomRegexDataset from './regex_random_unique_no-nested-star_1000'
7+
import handwrittenRegexDataset from './regex_handwritten'
88

99
const fullRegexDataset = [
1010
...randomRegexDataset,
1111
...handwrittenRegexDataset,
1212
]
1313

14-
const mults = []
14+
const mults: number[] = []
1515

16-
function run(inputRegExp, index) {
16+
function run(inputRegExp: RegExp, index: number) {
1717
console.log('#' + index, inputRegExp)
1818
const startTime = performance.now()
1919

20-
const inputRegex = parseRegExp(inputRegExp)
20+
const inputRegex = RE.fromRegExpAST(parseRegExp(inputRegExp))
2121
const outputRegex = toStdRegex(inputRegex)
2222
const outputRegExp = RE.toRegExp(outputRegex)
2323

@@ -28,7 +28,7 @@ function run(inputRegExp, index) {
2828

2929
console.log(`
3030
regex input length : ${inp}
31-
regex ouptut length : ${out}
31+
regex output length : ${out}
3232
multiplier : ${mult}
3333
`)
3434
}

equiv-checker.html

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,14 @@ <h1>RegExp Equivalence Checker</h1>
295295
<div class="info">
296296
<h4>How to use:</h4>
297297
Enter two JavaScript regular expressions (without the surrounding slashes) and click "Check Equivalence".
298-
The tool will determine the relationship between the two expressions: equivalent, subset, superset, or neither.
298+
The tool determines whether the two expression match exactly the same strings.
299+
Otherwise, example strings are shown that match either expression but not both.
299300

300301
<h4>Examples:</h4>
301302
<ul>
302-
<li><code>a*</code> and <code>a+</code> are not equivalent</li>
303-
<li><code>a|b</code> and <code>b|a</code> are equivalent</li>
304-
<li><code>(ab)+</code> and <code>ab(ab)*</code> are equivalent</li>
303+
<li><code>a|aa</code> and <code>a{1,2}</code> are equivalent</li>
304+
<li><code>a+</code> matches a subset of <code>a*</code></li>
305+
<li><code>^a{0,4}$</code> and <code>^a{2,6}$</code> match some common strings</li>
305306
</ul>
306307

307308
<h4>Supported syntax:</h4>

flake.nix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
buildInputs = with pkgs; [
1717
nodejs_23
1818
nodePackages.typescript-language-server
19+
ripgrep
1920
];
2021
};
2122
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"version": "0.0.0-development",
88
"type": "module",
99
"scripts": {
10-
"test": "node --test --import=tsx test/*.spec.ts",
10+
"test": "node --test --import=tsx",
1111
"build": "make",
1212
"serve": "http-server -p 8080",
1313
"bench": "0x --output-dir prof"

0 commit comments

Comments
 (0)