Skip to content

Commit 541b579

Browse files
committed
test: Arbitrary for RegExpAST
1 parent 2ce7b0e commit 541b579

File tree

5 files changed

+225
-18
lines changed

5 files changed

+225
-18
lines changed

src/ast.ts

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -149,33 +149,36 @@ function captureGroupToString(name: string | undefined, inner: RegExpAST, option
149149
}
150150

151151
export function debugShow(ast: RegExpAST): unknown {
152+
return JSON.stringify(debugShow_(ast), null, 2)
153+
}
154+
function debugShow_(ast: RegExpAST): unknown {
152155
switch (ast.type) {
153156
case 'epsilon':
154157
return '';
155158
case 'start-marker':
156-
return { type: 'start-marker', left: debugShow(ast.left), right: debugShow(ast.right) }
159+
return { type: 'start-marker', left: debugShow_(ast.left), right: debugShow_(ast.right) }
157160
case 'end-marker':
158-
return { type: 'end-marker', left: debugShow(ast.left), right: debugShow(ast.right) }
161+
return { type: 'end-marker', left: debugShow_(ast.left), right: debugShow_(ast.right) }
159162
case 'literal':
160163
return CharSet.toString(ast.charset)
161164
case 'concat':
162-
return { type: 'concat', left: debugShow(ast.left), right: debugShow(ast.right) }
165+
return { type: 'concat', left: debugShow_(ast.left), right: debugShow_(ast.right) }
163166
case 'union':
164-
return { type: 'union', left: debugShow(ast.left), right: debugShow(ast.right) }
167+
return { type: 'union', left: debugShow_(ast.left), right: debugShow_(ast.right) }
165168
case 'star':
166-
return { type: 'star', inner: debugShow(ast.inner) }
169+
return { type: 'star', inner: debugShow_(ast.inner) }
167170
case 'plus':
168-
return { type: 'plus', inner: debugShow(ast.inner) }
171+
return { type: 'plus', inner: debugShow_(ast.inner) }
169172
case 'optional':
170-
return { type: 'optional', inner: debugShow(ast.inner) }
173+
return { type: 'optional', inner: debugShow_(ast.inner) }
171174
case 'repeat':
172-
return { type: 'repeat', inner: debugShow(ast.inner), bounds: ast.bounds }
175+
return { type: 'repeat', inner: debugShow_(ast.inner), bounds: ast.bounds }
173176
case 'capture-group':
174-
return { type: 'capture-group', name: ast.name, inner: debugShow(ast.inner) }
177+
return { type: 'capture-group', name: ast.name, inner: debugShow_(ast.inner) }
175178
case 'positive-lookahead':
176-
return { type: 'positive-lookahead', inner: debugShow(ast.inner) }
179+
return { type: 'positive-lookahead', inner: debugShow_(ast.inner) }
177180
case 'negative-lookahead':
178-
return { type: 'negative-lookahead', inner: debugShow(ast.inner) }
181+
return { type: 'negative-lookahead', inner: debugShow_(ast.inner) }
179182
}
180183
checkedAllCases(ast)
181184
}

test/arbitrary-ast.spec.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import fc from "fast-check"
2+
import { it, describe } from "node:test"
3+
import assert from "node:assert"
4+
import * as Arbitrary from './arbitrary-ast'
5+
import * as AST from '../src/ast'
6+
7+
describe('regexpAST', () => {
8+
it('only generates valid regexp', () => {
9+
fc.assert(
10+
fc.property(
11+
Arbitrary.regexpAST(),
12+
(ast) => {
13+
const regexpStr = AST.toString(ast, { useNonCapturingGroups: true })
14+
assert.doesNotThrow(() => new RegExp(regexpStr))
15+
}
16+
),
17+
)
18+
})
19+
})

test/arbitrary-ast.ts

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import fc from 'fast-check'
2+
import * as AST from '../src/ast'
3+
import * as CharSet from '../src/char-set'
4+
import { checkedAllCases } from 'src/utils'
5+
6+
export function charSet(): fc.Arbitrary<CharSet.CharSet> {
7+
return fc.constantFrom('a', 'b', 'c', 'd', 'e', 'f')
8+
.map(CharSet.singleton)
9+
}
10+
11+
export function repeatBounds(): fc.Arbitrary<AST.RepeatBounds> {
12+
return fc.oneof(
13+
fc.nat({ max: 10 }),
14+
fc.record({ min: fc.nat({ max: 10 }) }),
15+
fc.record({ max: fc.nat({ max: 10 }) }),
16+
fc.record({ min: fc.nat({ max: 5 }), max: fc.integer({ min: 5, max: 10 }) })
17+
)
18+
}
19+
20+
export function captureName(): fc.Arbitrary<string | undefined> {
21+
return fc.option(fc.stringMatching(/^[a-zA-Z_]\w{0,8}$/))
22+
}
23+
24+
function epsilon(): fc.Arbitrary<AST.RegExpAST> {
25+
return fc.constant(AST.epsilon)
26+
}
27+
28+
function literal(): fc.Arbitrary<AST.RegExpAST> {
29+
return charSet().map(AST.literal)
30+
}
31+
32+
function concat(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
33+
return fc.tuple(childArb(), childArb())
34+
.map(([left, right]) => AST.concat(left, right))
35+
}
36+
37+
function union(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
38+
return fc.tuple(childArb(), childArb())
39+
.map(([left, right]) => AST.union(left, right))
40+
}
41+
42+
function star(innerArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
43+
return innerArb().map(AST.star)
44+
}
45+
46+
function plus(innerArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
47+
return innerArb().map(AST.plus)
48+
}
49+
50+
function optional(innerArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
51+
return innerArb().map(AST.optional)
52+
}
53+
54+
function repeat(innerArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
55+
return fc.tuple(innerArb(), repeatBounds())
56+
.map(([inner, bounds]) => AST.repeat(inner, bounds))
57+
}
58+
59+
function captureGroup(innerArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
60+
return fc.tuple(innerArb(), captureName())
61+
.map(([inner, name]) => AST.captureGroup(inner, name))
62+
}
63+
64+
function positiveLookahead(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
65+
return fc.tuple(childArb(), childArb())
66+
.map(([inner, right]) => AST.positiveLookahead(inner, right))
67+
}
68+
69+
function negativeLookahead(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
70+
return fc.tuple(childArb(), childArb())
71+
.map(([inner, right]) => AST.negativeLookahead(inner, right))
72+
}
73+
74+
function startMarker(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
75+
return fc.tuple(childArb(), childArb())
76+
.map(([left, right]) => AST.startMarker(left, right))
77+
}
78+
79+
function endMarker(childArb: () => fc.Arbitrary<AST.RegExpAST>): fc.Arbitrary<AST.RegExpAST> {
80+
return fc.tuple(childArb(), childArb())
81+
.map(([left, right]) => AST.endMarker(left, right))
82+
}
83+
84+
/**
85+
* Traverses AST and renames capturing groups if the name already occurs in the expression.
86+
* `new RegExp(...)` throws an error when capture group names occur multiple times in the
87+
* same expression.
88+
*/
89+
export function makeCaptureGroupNamesUnique(ast: AST.RegExpAST): AST.RegExpAST {
90+
const seenNames = new Map<string, number>()
91+
92+
function renameIfSeen(name: string) {
93+
const counter = seenNames.get(name)
94+
if (counter === undefined) {
95+
seenNames.set(name, 1)
96+
return name
97+
} else {
98+
const newName = `${name}_${counter+1}`
99+
return renameIfSeen(newName)
100+
}
101+
}
102+
103+
function traverse(node: AST.RegExpAST): AST.RegExpAST {
104+
switch (node.type) {
105+
case 'epsilon':
106+
return node
107+
case 'literal':
108+
return node
109+
case 'concat':
110+
return AST.concat(traverse(node.left), traverse(node.right))
111+
case 'union':
112+
return AST.union(traverse(node.left), traverse(node.right))
113+
case 'star':
114+
return AST.star(traverse(node.inner))
115+
case 'plus':
116+
return AST.plus(traverse(node.inner))
117+
case 'optional':
118+
return AST.optional(traverse(node.inner))
119+
case 'repeat':
120+
return AST.repeat(traverse(node.inner), node.bounds)
121+
case 'capture-group': {
122+
const innerProcessed = traverse(node.inner)
123+
124+
if (node.name === undefined) {
125+
return AST.captureGroup(innerProcessed, node.name)
126+
} else {
127+
const nameProcessed = renameIfSeen(node.name)
128+
return AST.captureGroup(innerProcessed, nameProcessed)
129+
}
130+
}
131+
case 'positive-lookahead':
132+
return AST.positiveLookahead(traverse(node.inner), traverse(node.right))
133+
case 'negative-lookahead':
134+
return AST.negativeLookahead(traverse(node.inner), traverse(node.right))
135+
case 'start-marker':
136+
return AST.startMarker(traverse(node.left), traverse(node.right))
137+
case 'end-marker':
138+
return AST.endMarker(traverse(node.left), traverse(node.right))
139+
default:
140+
checkedAllCases(node)
141+
}
142+
}
143+
144+
return traverse(ast)
145+
}
146+
147+
export function regexpAST(size = 20): fc.Arbitrary<AST.RegExpAST> {
148+
return regexpAST_(size).map(makeCaptureGroupNamesUnique)
149+
}
150+
function regexpAST_(size: number): fc.Arbitrary<AST.RegExpAST> {
151+
if (size <= 1) {
152+
return fc.oneof(
153+
epsilon(),
154+
literal()
155+
)
156+
} else {
157+
const childSize = Math.floor(size / 2)
158+
return fc.oneof(
159+
{ arbitrary: epsilon(), weight: 1 },
160+
{ arbitrary: literal(), weight: 5 },
161+
{ arbitrary: concat(() => regexpAST_(childSize)), weight: 3 },
162+
{ arbitrary: union(() => regexpAST_(childSize)), weight: 3 },
163+
{ arbitrary: star(() => regexpAST_(childSize)), weight: 1 },
164+
{ arbitrary: plus(() => regexpAST_(childSize)), weight: 1 },
165+
{ arbitrary: optional(() => regexpAST_(childSize)), weight: 1 },
166+
{ arbitrary: repeat(() => regexpAST_(childSize)), weight: 1 },
167+
{ arbitrary: captureGroup(() => regexpAST_(childSize)), weight: 2 },
168+
{ arbitrary: positiveLookahead(() => regexpAST_(childSize)), weight: 1 },
169+
{ arbitrary: negativeLookahead(() => regexpAST_(childSize)), weight: 1 },
170+
{ arbitrary: startMarker(() => regexpAST_(childSize)), weight: 1 },
171+
{ arbitrary: endMarker(() => regexpAST_(childSize)), weight: 1 }
172+
)
173+
}
174+
}
175+
176+
export function regexp(size?: number): fc.Arbitrary<RegExp> {
177+
return regexpAST(size).map(ast => new RegExp(AST.toString(ast, { useNonCapturingGroups: true })))
178+
}

test/arbitrary-regex.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,17 @@ function star(innerArb: () => fc.Arbitrary<RE.StdRegex>): fc.Arbitrary<RE.StdReg
3434
}
3535

3636
export function stdRegex(size = 100): fc.Arbitrary<RE.StdRegex> {
37-
if (size <= 0)
37+
if (size <= 1) {
3838
return literal()
39-
else
39+
} else {
40+
const childSize = Math.floor(size / 2)
4041
return fc.oneof(
4142
{ arbitrary: literal(), weight: 5 },
42-
{ arbitrary: concat(() => stdRegex(Math.floor(size/2))), weight: 3 },
43-
{ arbitrary: union(() => stdRegex(Math.floor(size/2))), weight: 3 },
44-
{ arbitrary: star(() => stdRegex(Math.floor(size/2))), weight: 1 },
43+
{ arbitrary: concat(() => stdRegex(childSize)), weight: 3 },
44+
{ arbitrary: union(() => stdRegex(childSize)), weight: 3 },
45+
{ arbitrary: star(() => stdRegex(childSize)), weight: 1 },
4546
)
47+
}
4648
}
4749

4850
export function stdRegexString(): fc.Arbitrary<string> {

test/regex-parser.spec.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,15 @@ describe('parseRegExp', () => {
9999
'a+*',
100100
// invalid capture group names:
101101
'(?<1abc>.)',
102-
// FIXME:
102+
103+
// TODO: duplicate capture group name:
104+
// '(?<abc>.)(?<abc>.)',
105+
106+
// TODO:
103107
// 'a?{2}',
104108
// 'a+{2}',
105-
// FIXME: invalid ranges:
109+
110+
// TODO: invalid ranges:
106111
// '[a-#]',
107112
// '[%-#]',
108113
]

0 commit comments

Comments
 (0)