|
| 1 | +import { describe, it } from "node:test" |
| 2 | +import { strict as assert } from "node:assert" |
| 3 | +import * as RE from "../src/regex" |
| 4 | +import * as AST from "../src/ast" |
| 5 | +import { parseRegExp } from "../src/regex-parser" |
| 6 | + |
| 7 | +describe('toExtRegex', () => { |
| 8 | + |
| 9 | + const dotStar = RE.star(RE.anySingleChar) |
| 10 | + |
| 11 | + // function infix(regex: RE.ExtRegex) { |
| 12 | + // return RE.seq([ dotStar, regex, dotStar ]) |
| 13 | + // } |
| 14 | + |
| 15 | + function prefix(regex: RE.ExtRegex) { |
| 16 | + return RE.concat(regex, dotStar) |
| 17 | + } |
| 18 | + |
| 19 | + function suffix(regex: RE.ExtRegex) { |
| 20 | + return RE.concat(dotStar, regex) |
| 21 | + } |
| 22 | + |
| 23 | + describe('union with empty members', () => { |
| 24 | + const testCases = [ |
| 25 | + [/^(|a)$/, RE.optional(RE.singleChar('a'))], |
| 26 | + [/^(a||)$/, RE.optional(RE.singleChar('a'), )], |
| 27 | + [/^(|a|)$/, RE.optional(RE.singleChar('a'))], |
| 28 | + [/^(|)$/, RE.epsilon], |
| 29 | + ] as const |
| 30 | + |
| 31 | + for (const [regexp, expected] of testCases) { |
| 32 | + it(`${regexp}`, () => { |
| 33 | + const actual = AST.toExtRegex(parseRegExp(regexp)) |
| 34 | + assert.equal(actual.hash, expected.hash) |
| 35 | + }) |
| 36 | + } |
| 37 | + }) |
| 38 | + |
| 39 | + describe('start/end anchor elimination', () => { |
| 40 | + const testCases = [ |
| 41 | + [/^abc/, RE.seq([RE.string('abc'), dotStar])], |
| 42 | + // start marker contradictions can only match empty set: |
| 43 | + [/a^b/, RE.empty], |
| 44 | + [/^a^b/, RE.empty], |
| 45 | + // but two ^^ directly in a row are not a contradiction: |
| 46 | + [/(^^a|b)/, prefix(RE.union(RE.singleChar('a'), suffix(RE.singleChar('b'))))], |
| 47 | + // in fact, as long as anything between two ^ can match epsilon, |
| 48 | + // there is no contradiction: |
| 49 | + [/(^(c|)^a|b)/, prefix(RE.union(RE.singleChar('a'), suffix(RE.singleChar('b'))))], |
| 50 | + [/(^c*^a|b)/, prefix(RE.union(RE.singleChar('a'), suffix(RE.singleChar('b'))))], |
| 51 | + // Also, contradiction inside a union does NOT collapse |
| 52 | + // the whole expression to empty set: |
| 53 | + [/(a^b|c)/, RE.seq([dotStar, RE.singleChar('c'), dotStar])], |
| 54 | + [/^(a^b|c)/, RE.seq([RE.singleChar('c'), dotStar])], |
| 55 | + |
| 56 | + // End anchor before start anchor is contradictory and describes empty set: |
| 57 | + [/$.^/, RE.empty], |
| 58 | + // Can still match epsilon as long as there's nothing between end- and start anchor: |
| 59 | + [/$^/, RE.epsilon], |
| 60 | + // Nullable expressions on the left and right can be ignored: |
| 61 | + [/(a?)$^(b*)/, RE.epsilon], |
| 62 | + |
| 63 | + [/(^a|)^b/, RE.seq([RE.singleChar('b'), dotStar])], |
| 64 | + [/^a(b^|c)/, RE.seq([RE.string('ac'), dotStar]) ], |
| 65 | + [/(^|a)b/, prefix(RE.concat(RE.optional(suffix(RE.singleChar('a'))), RE.singleChar('b')))], |
| 66 | + |
| 67 | + // FIXME: |
| 68 | + // [/(^)+a$/, RE.singleChar('a') ], |
| 69 | + [/(^)*a$/, suffix(RE.singleChar('a')) ], |
| 70 | + [/(b|^)a$/, RE.concat(RE.optional(suffix(RE.singleChar('b'))), RE.singleChar('a'))], |
| 71 | + [/a(^)/, RE.empty], |
| 72 | + ] as const |
| 73 | + |
| 74 | + for (const [regexp, expected] of testCases) { |
| 75 | + it(`${regexp}`, () => { |
| 76 | + const actual = AST.toExtRegex(parseRegExp(regexp)) |
| 77 | + assert.equal(actual.hash, expected.hash) |
| 78 | + }) |
| 79 | + } |
| 80 | + }) |
| 81 | + |
| 82 | + describe('lookahead elimination', () => { |
| 83 | + const testCases = [ |
| 84 | + // positive lookahead: |
| 85 | + [/^(?=a)a$/, RE.string('a')], |
| 86 | + [/^a(?=b)b$/, RE.string('ab')], |
| 87 | + [/^((?=a)a|(?=b)b)$/, RE.union(RE.string('a'), RE.string('b'))], |
| 88 | + [/^(?=[0-5])(?=[5-9])[3-7]$/, RE.string('5')], |
| 89 | + // negative lookahead: |
| 90 | + [/^a(?!b)c$/, RE.concat(RE.string('a'), RE.intersection(RE.complement(RE.string('b')), RE.string('c')))], |
| 91 | + // TODO: lookahead + lookbehind |
| 92 | + // [/^a(?=b)(?<=a)b$/, RE.string('ab')], |
| 93 | + // [/^b(?=ab)a(?<=ba)b$/, RE.string('bab')], |
| 94 | + // [/^a(?=b)(?<=a)(?!a)(?<!b)b$/, RE.string('ab')], |
| 95 | + ] as const |
| 96 | + |
| 97 | + for (const [regexp, expected] of testCases) { |
| 98 | + it(`${regexp}`, () => { |
| 99 | + const actual = AST.toExtRegex(parseRegExp(regexp)) |
| 100 | + assert.equal(actual.hash, expected.hash, RE.debugShow(actual) + '\n\n' + RE.debugShow(expected)) |
| 101 | + }) |
| 102 | + } |
| 103 | + |
| 104 | + it('fixme', { todo: true }, () => { |
| 105 | + const actual = AST.toExtRegex(parseRegExp(/^(a(?!b))*$/)) |
| 106 | + const expected = RE.star(RE.string('a')) |
| 107 | + assert.equal(actual.hash, expected.hash) |
| 108 | + }) |
| 109 | + |
| 110 | + }) |
| 111 | + |
| 112 | +}) |
0 commit comments