From e20b357b2dd04735e69d96dcef04d858c57f4831 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 12 Dec 2023 13:53:03 +0100 Subject: [PATCH 1/6] refactor: optimize grouping --- .../__tests__/any-of.test.ts | 2 +- src/character-classes/compiler.ts | 20 ++++++++++--- src/compiler.ts | 18 ++++++----- src/components/__tests__/choiceOf.test.ts | 23 +++++++++----- src/components/choiceOf.ts | 16 ++++++---- src/quantifiers/__tests__/base.test.tsx | 8 +++++ src/quantifiers/__tests__/repeat.test.tsx | 9 +++++- src/quantifiers/base.ts | 30 +++++++++++++++---- src/quantifiers/repeat.ts | 17 +++++++---- src/types-internal.ts | 18 ++++++++++- src/utils.ts | 27 ++++++++++++++--- 11 files changed, 147 insertions(+), 41 deletions(-) diff --git a/src/character-classes/__tests__/any-of.test.ts b/src/character-classes/__tests__/any-of.test.ts index 0876929..188053b 100644 --- a/src/character-classes/__tests__/any-of.test.ts +++ b/src/character-classes/__tests__/any-of.test.ts @@ -11,7 +11,7 @@ test('"anyOf" base cases', () => { test('"anyOf" in context', () => { expect(p('x', anyOf('a'), 'x')).toBe('xax'); expect(p('x', anyOf('abc'), 'x')).toBe('x[abc]x'); - expect(p('x', oneOrMore(anyOf('abc')), 'x')).toBe('x(?:[abc])+x'); + expect(p('x', oneOrMore(anyOf('abc')), 'x')).toBe('x[abc]+x'); }); test('"anyOf" escapes special characters', () => { diff --git a/src/character-classes/compiler.ts b/src/character-classes/compiler.ts index 5ce0a46..ed9fc74 100644 --- a/src/character-classes/compiler.ts +++ b/src/character-classes/compiler.ts @@ -1,15 +1,27 @@ import type { CharacterClass } from '../types'; +import type { RegexNode } from '../types-internal'; -export function compileCharacterClass({ characters }: CharacterClass): string { +export function compileCharacterClass({ + characters, +}: CharacterClass): RegexNode { if (characters.length === 0) { - return ''; + return { + type: 'atom', + pattern: '', + }; } if (characters.length === 1) { - return characters[0]!; + return { + type: 'atom', + pattern: characters[0]!, + }; } - return `[${escapeHyphen(characters).join('')}]`; + return { + type: 'atom', + pattern: `[${escapeHyphen(characters).join('')}]`, + }; } // If passed characters includes hyphen (`-`) it need to be moved to diff --git a/src/compiler.ts b/src/compiler.ts index c3cbf82..76ffb7b 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -1,9 +1,10 @@ import type { RegexElement } from './types'; +import type { RegexNode } from './types-internal'; import { compileChoiceOf } from './components/choiceOf'; import { compileCharacterClass } from './character-classes/compiler'; import { baseQuantifiers, isBaseQuantifier } from './quantifiers/base'; import { compileRepeat } from './quantifiers/repeat'; -import { escapeText } from './utils'; +import { concatNodes, escapeText } from './utils'; /** * Generate RegExp object for elements. @@ -12,7 +13,7 @@ import { escapeText } from './utils'; * @returns */ export function buildRegex(...elements: RegexElement[]): RegExp { - const pattern = compileList(elements); + const pattern = compileList(elements).pattern; return new RegExp(pattern); } @@ -22,18 +23,21 @@ export function buildRegex(...elements: RegexElement[]): RegExp { * @returns */ export function buildPattern(...elements: RegexElement[]): string { - return compileList(elements); + return compileList(elements).pattern; } // Recursive compilation -function compileList(elements: RegexElement[]): string { - return elements.map((c) => compileSingle(c)).join(''); +function compileList(elements: RegexElement[]): RegexNode { + return concatNodes(elements.map((c) => compileSingle(c))); } -function compileSingle(element: RegexElement): string { +function compileSingle(element: RegexElement): RegexNode { if (typeof element === 'string') { - return escapeText(element); + return { + type: element.length === 1 ? 'atom' : 'sequence', + pattern: escapeText(element), + }; } if (element.type === 'characterClass') { diff --git a/src/components/__tests__/choiceOf.test.ts b/src/components/__tests__/choiceOf.test.ts index e5a765b..2e1c0ee 100644 --- a/src/components/__tests__/choiceOf.test.ts +++ b/src/components/__tests__/choiceOf.test.ts @@ -5,19 +5,28 @@ import { choiceOf } from '../choiceOf'; test('"choiceOf" using basic strings', () => { expect(buildPattern(choiceOf('a'))).toEqual('a'); - expect(buildPattern(choiceOf('a', 'b'))).toEqual('(?:a|b)'); - expect(buildPattern(choiceOf('a', 'b', 'c'))).toEqual('(?:a|b|c)'); + expect(buildPattern(choiceOf('a', 'b'))).toEqual('a|b'); + expect(buildPattern(choiceOf('a', 'b', 'c'))).toEqual('a|b|c'); + expect(buildPattern(choiceOf('aaa', 'bbb'))).toEqual('aaa|bbb'); +}); + +test('"choiceOf" used in sequence', () => { + expect(buildPattern('x', choiceOf('a'), 'x')).toEqual('xax'); + expect(buildPattern(choiceOf('a', 'b'), 'x')).toEqual('(?:a|b)x'); + expect(buildPattern('x', choiceOf('a', 'b'))).toEqual('x(?:a|b)'); + + expect(buildPattern(choiceOf('a', 'b', 'c'))).toEqual('a|b|c'); + expect(buildPattern('x', choiceOf('a', 'b', 'c'))).toEqual('x(?:a|b|c)'); + expect(buildPattern(choiceOf('a', 'b', 'c'), 'x')).toEqual('(?:a|b|c)x'); - expect(buildPattern(choiceOf('aaa', 'bbb'))).toEqual('(?:aaa|bbb)'); + expect(buildPattern(choiceOf('aaa', 'bbb'))).toEqual('aaa|bbb'); }); test('"choiceOf" using nested regex', () => { - expect(buildPattern(choiceOf(oneOrMore('a'), zeroOrMore('b')))).toBe( - '(?:a+|b*)' - ); + expect(buildPattern(choiceOf(oneOrMore('a'), zeroOrMore('b')))).toBe('a+|b*'); expect( buildPattern( choiceOf(repeat({ min: 1, max: 3 }, 'a'), repeat({ count: 5 }, 'bx')) ) - ).toBe('(?:a{1,3}|(?:bx){5})'); + ).toBe('a{1,3}|(?:bx){5}'); }); diff --git a/src/components/choiceOf.ts b/src/components/choiceOf.ts index 62d0cac..fd78ac6 100644 --- a/src/components/choiceOf.ts +++ b/src/components/choiceOf.ts @@ -1,6 +1,5 @@ import type { ChoiceOf, RegexElement } from '../types'; -import type { CompileSingle } from '../types-internal'; -import { wrapGroup } from '../utils'; +import type { CompileSingle, RegexNode } from '../types-internal'; export function choiceOf(...children: RegexElement[]): ChoiceOf { return { @@ -12,7 +11,14 @@ export function choiceOf(...children: RegexElement[]): ChoiceOf { export function compileChoiceOf( element: ChoiceOf, compileSingle: CompileSingle -): string { - const compiledChildren = element.children.map(compileSingle); - return wrapGroup(compiledChildren.join('|')); +): RegexNode { + const compileNodes = element.children.map(compileSingle); + if (compileNodes.length === 1) { + return compileNodes[0]!; + } + + return { + type: 'alternation', + pattern: compileNodes.map((n) => n.pattern).join('|'), + }; } diff --git a/src/quantifiers/__tests__/base.test.tsx b/src/quantifiers/__tests__/base.test.tsx index 05cd750..eec5e68 100644 --- a/src/quantifiers/__tests__/base.test.tsx +++ b/src/quantifiers/__tests__/base.test.tsx @@ -1,5 +1,6 @@ import { one, oneOrMore, optionally, zeroOrMore } from '../base'; import { buildPattern, buildRegex } from '../../compiler'; +import { digit } from '../../character-classes/base'; test('"oneOrMore" quantifier', () => { expect(buildPattern(oneOrMore('a'))).toEqual('a+'); @@ -44,3 +45,10 @@ test('zeroOrMore does not generate capture when grouping', () => { const groups = [...'aa'.match(regex)!]; expect(groups).toEqual(['aa']); }); + +test('base quantifiers optimize grouping for atoms', () => { + expect(buildPattern(one(digit))).toBe('\\d'); + expect(buildPattern(oneOrMore(digit))).toBe('\\d+'); + expect(buildPattern(optionally(digit))).toBe('\\d?'); + expect(buildPattern(zeroOrMore(digit))).toBe('\\d*'); +}); diff --git a/src/quantifiers/__tests__/repeat.test.tsx b/src/quantifiers/__tests__/repeat.test.tsx index d0015c7..46d09ed 100644 --- a/src/quantifiers/__tests__/repeat.test.tsx +++ b/src/quantifiers/__tests__/repeat.test.tsx @@ -1,6 +1,7 @@ -import { buildPattern } from '../../compiler'; import { zeroOrMore, oneOrMore } from '../base'; import { repeat } from '../repeat'; +import { digit } from '../../character-classes/base'; +import { buildPattern } from '../../compiler'; test('"repeat" quantifier', () => { expect(buildPattern('a', repeat({ min: 1, max: 5 }, 'b'))).toEqual('ab{1,5}'); @@ -14,3 +15,9 @@ test('"repeat" quantifier', () => { buildPattern(repeat({ count: 5 }, 'text', ' ', oneOrMore('d'))) ).toEqual('(?:text d+){5}'); }); + +test('"repeat"" optimizes grouping for atoms', () => { + expect(buildPattern(repeat({ count: 2 }, digit))).toBe('\\d{2}'); + expect(buildPattern(repeat({ min: 2 }, digit))).toBe('\\d{2,}'); + expect(buildPattern(repeat({ min: 1, max: 5 }, digit))).toBe('\\d{1,5}'); +}); diff --git a/src/quantifiers/base.ts b/src/quantifiers/base.ts index 7f12221..bacf2a3 100644 --- a/src/quantifiers/base.ts +++ b/src/quantifiers/base.ts @@ -6,7 +6,8 @@ import type { RegexElement, ZeroOrMore, } from '../types'; -import { wrapGroup } from '../utils'; +import { asAtom } from '../utils'; +import type { RegexNode } from '../types-internal'; export function one(...children: RegexElement[]): One { return { @@ -37,11 +38,28 @@ export function zeroOrMore(...children: RegexElement[]): ZeroOrMore { } export const baseQuantifiers = { - one: (compiledChildren) => compiledChildren, - oneOrMore: (compiledChildren) => `${wrapGroup(compiledChildren)}+`, - optionally: (compiledChildren) => `${wrapGroup(compiledChildren)}?`, - zeroOrMore: (compiledChildren) => `${wrapGroup(compiledChildren)}*`, -} as const satisfies Record string>; + one: (node) => { + return node; + }, + oneOrMore: (node) => { + return { + type: 'sequence', + pattern: `${asAtom(node)}+`, + }; + }, + optionally: (node) => { + return { + type: 'sequence', + pattern: `${asAtom(node)}?`, + }; + }, + zeroOrMore: (node) => { + return { + type: 'sequence', + pattern: `${asAtom(node)}*`, + }; + }, +} as const satisfies Record RegexNode>; export function isBaseQuantifier( element: Exclude diff --git a/src/quantifiers/repeat.ts b/src/quantifiers/repeat.ts index c6c6288..4b56aac 100644 --- a/src/quantifiers/repeat.ts +++ b/src/quantifiers/repeat.ts @@ -1,5 +1,6 @@ import type { RegexElement, Repeat, RepeatConfig } from '../types'; -import { wrapGroup } from '../utils'; +import type { RegexNode } from '../types-internal'; +import { asAtom } from '../utils'; export function repeat( config: RepeatConfig, @@ -14,11 +15,17 @@ export function repeat( export function compileRepeat( config: RepeatConfig, - compiledChildren: string -): string { + node: RegexNode +): RegexNode { if ('count' in config) { - return `${wrapGroup(compiledChildren)}{${config.count}}`; + return { + type: 'sequence', + pattern: `${asAtom(node)}{${config.count}}`, + }; } - return `${wrapGroup(compiledChildren)}{${config.min},${config?.max ?? ''}}`; + return { + type: 'sequence', + pattern: `${asAtom(node)}{${config.min},${config?.max ?? ''}}`, + }; } diff --git a/src/types-internal.ts b/src/types-internal.ts index ab54caf..0b65065 100644 --- a/src/types-internal.ts +++ b/src/types-internal.ts @@ -1,3 +1,19 @@ import type { RegexElement } from './types'; -export type CompileSingle = (element: RegexElement) => string; +/** + * Compiled regex pattern with information about its type (atom, sequence) + */ +export interface RegexNode { + pattern: string; + type: RegexNodeType; +} + +export type RegexNodeType = + /** Atom */ + | 'atom' + /** Sequence of atoms */ + | 'sequence' + /** Alternation */ + | 'alternation'; + +export type CompileSingle = (element: RegexElement) => RegexNode; diff --git a/src/utils.ts b/src/utils.ts index 6b48f1f..df623d5 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,11 +1,30 @@ +import type { RegexNode } from './types-internal'; + /** - * Wraps regex string in a non-capturing group if it is more than one character long. + * Returns atomic pattern for given node. * - * @param regex + * @param node * @returns */ -export function wrapGroup(regex: string): string { - return regex.length === 1 ? regex : `(?:${regex})`; +export function asAtom(node: RegexNode): string { + if (node.type === 'atom') { + return node.pattern; + } + + return `(?:${node.pattern})`; +} + +export function concatNodes(nodes: RegexNode[]): RegexNode { + if (nodes.length === 1) { + return nodes[0]!; + } + + return { + type: 'sequence', + pattern: nodes + .map((n) => (n.type === 'alternation' ? asAtom(n) : n.pattern)) + .join(''), + }; } // Source: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping From 291c920804f6faaf3318867268843c5cb15f9a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Tue, 12 Dec 2023 22:18:44 +0100 Subject: [PATCH 2/6] chore: introduce pseudo-enum --- src/character-classes/compiler.ts | 8 ++++---- src/compiler.ts | 7 +++++-- src/components/choiceOf.ts | 8 ++++++-- src/quantifiers/base.ts | 8 ++++---- src/quantifiers/repeat.ts | 6 +++--- src/types-internal.ts | 26 ++++++++++++++++++-------- src/utils.ts | 10 ++++++---- 7 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/character-classes/compiler.ts b/src/character-classes/compiler.ts index ed9fc74..74f4644 100644 --- a/src/character-classes/compiler.ts +++ b/src/character-classes/compiler.ts @@ -1,25 +1,25 @@ import type { CharacterClass } from '../types'; -import type { RegexNode } from '../types-internal'; +import { RegexNodePriority, type RegexNode } from '../types-internal'; export function compileCharacterClass({ characters, }: CharacterClass): RegexNode { if (characters.length === 0) { return { - type: 'atom', + priority: RegexNodePriority.Atom, pattern: '', }; } if (characters.length === 1) { return { - type: 'atom', + priority: RegexNodePriority.Atom, pattern: characters[0]!, }; } return { - type: 'atom', + priority: RegexNodePriority.Atom, pattern: `[${escapeHyphen(characters).join('')}]`, }; } diff --git a/src/compiler.ts b/src/compiler.ts index 76ffb7b..3c1dc3d 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -1,5 +1,5 @@ import type { RegexElement } from './types'; -import type { RegexNode } from './types-internal'; +import { RegexNodePriority, type RegexNode } from './types-internal'; import { compileChoiceOf } from './components/choiceOf'; import { compileCharacterClass } from './character-classes/compiler'; import { baseQuantifiers, isBaseQuantifier } from './quantifiers/base'; @@ -35,7 +35,10 @@ function compileList(elements: RegexElement[]): RegexNode { function compileSingle(element: RegexElement): RegexNode { if (typeof element === 'string') { return { - type: element.length === 1 ? 'atom' : 'sequence', + priority: + element.length === 1 + ? RegexNodePriority.Atom + : RegexNodePriority.Sequence, pattern: escapeText(element), }; } diff --git a/src/components/choiceOf.ts b/src/components/choiceOf.ts index fd78ac6..36428f3 100644 --- a/src/components/choiceOf.ts +++ b/src/components/choiceOf.ts @@ -1,5 +1,9 @@ import type { ChoiceOf, RegexElement } from '../types'; -import type { CompileSingle, RegexNode } from '../types-internal'; +import { + RegexNodePriority, + type CompileSingle, + type RegexNode, +} from '../types-internal'; export function choiceOf(...children: RegexElement[]): ChoiceOf { return { @@ -18,7 +22,7 @@ export function compileChoiceOf( } return { - type: 'alternation', + priority: RegexNodePriority.Alternation, pattern: compileNodes.map((n) => n.pattern).join('|'), }; } diff --git a/src/quantifiers/base.ts b/src/quantifiers/base.ts index bacf2a3..60a78ad 100644 --- a/src/quantifiers/base.ts +++ b/src/quantifiers/base.ts @@ -7,7 +7,7 @@ import type { ZeroOrMore, } from '../types'; import { asAtom } from '../utils'; -import type { RegexNode } from '../types-internal'; +import { RegexNodePriority, type RegexNode } from '../types-internal'; export function one(...children: RegexElement[]): One { return { @@ -43,19 +43,19 @@ export const baseQuantifiers = { }, oneOrMore: (node) => { return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: `${asAtom(node)}+`, }; }, optionally: (node) => { return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: `${asAtom(node)}?`, }; }, zeroOrMore: (node) => { return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: `${asAtom(node)}*`, }; }, diff --git a/src/quantifiers/repeat.ts b/src/quantifiers/repeat.ts index 4b56aac..d2f4d5e 100644 --- a/src/quantifiers/repeat.ts +++ b/src/quantifiers/repeat.ts @@ -1,5 +1,5 @@ import type { RegexElement, Repeat, RepeatConfig } from '../types'; -import type { RegexNode } from '../types-internal'; +import { RegexNodePriority, type RegexNode } from '../types-internal'; import { asAtom } from '../utils'; export function repeat( @@ -19,13 +19,13 @@ export function compileRepeat( ): RegexNode { if ('count' in config) { return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: `${asAtom(node)}{${config.count}}`, }; } return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: `${asAtom(node)}{${config.min},${config?.max ?? ''}}`, }; } diff --git a/src/types-internal.ts b/src/types-internal.ts index 0b65065..edfcb7b 100644 --- a/src/types-internal.ts +++ b/src/types-internal.ts @@ -5,15 +5,25 @@ import type { RegexElement } from './types'; */ export interface RegexNode { pattern: string; - type: RegexNodeType; + priority: RegexNodePriority; } -export type RegexNodeType = - /** Atom */ - | 'atom' - /** Sequence of atoms */ - | 'sequence' - /** Alternation */ - | 'alternation'; +/** + * Higher is more important. + */ +export const RegexNodePriority = { + // Atoms: single characters, character classes (`\d`, `[a-z]`), + // capturing and non-capturing groups (`()`) + Atom: 3, + + // Sequence of atoms, e.g., `abc` + Sequence: 2, + + // Alteration (OR, `|`) expression, e.g., `a|b` + Alternation: 1, +} as const; + +type ValueOf = T[keyof T]; +type RegexNodePriority = ValueOf; export type CompileSingle = (element: RegexElement) => RegexNode; diff --git a/src/utils.ts b/src/utils.ts index df623d5..f92921c 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,4 @@ -import type { RegexNode } from './types-internal'; +import { RegexNodePriority, type RegexNode } from './types-internal'; /** * Returns atomic pattern for given node. @@ -7,7 +7,7 @@ import type { RegexNode } from './types-internal'; * @returns */ export function asAtom(node: RegexNode): string { - if (node.type === 'atom') { + if (node.priority === RegexNodePriority.Atom) { return node.pattern; } @@ -20,9 +20,11 @@ export function concatNodes(nodes: RegexNode[]): RegexNode { } return { - type: 'sequence', + priority: RegexNodePriority.Sequence, pattern: nodes - .map((n) => (n.type === 'alternation' ? asAtom(n) : n.pattern)) + .map((n) => + n.priority < RegexNodePriority.Sequence ? asAtom(n) : n.pattern + ) .join(''), }; } From 0b45d8f16b16b6ae9bb7e801325da511d58518e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Tue, 12 Dec 2023 22:40:37 +0100 Subject: [PATCH 3/6] refactor: introduce zero element checks when appropriate --- src/__tests__/compiler.test.tsx | 6 +++++ .../__tests__/any-of.test.ts | 7 ++++- .../__tests__/compiler.test.ts | 12 +++++++++ src/character-classes/any-of.ts | 7 ++++- src/character-classes/base.ts | 10 +++---- src/character-classes/compiler.ts | 5 +--- src/compiler.ts | 26 ++++++++++++++----- src/components/__tests__/choiceOf.test.ts | 6 +++++ src/components/choiceOf.ts | 4 +++ src/quantifiers/__tests__/repeat.test.tsx | 6 +++++ src/quantifiers/repeat.ts | 4 +++ 11 files changed, 75 insertions(+), 18 deletions(-) create mode 100644 src/character-classes/__tests__/compiler.test.ts diff --git a/src/__tests__/compiler.test.tsx b/src/__tests__/compiler.test.tsx index 17308ce..921ccb3 100644 --- a/src/__tests__/compiler.test.tsx +++ b/src/__tests__/compiler.test.tsx @@ -56,3 +56,9 @@ test('buildRegex throws error on unknown element', () => { buildRegex({ type: 'unknown' }) ).toThrowErrorMatchingInlineSnapshot(`"Unknown elements type unknown"`); }); + +test('buildPattern throws on empty text', () => { + expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot( + `"\`compileText\`: received text should not be empty"` + ); +}); diff --git a/src/character-classes/__tests__/any-of.test.ts b/src/character-classes/__tests__/any-of.test.ts index 188053b..859e691 100644 --- a/src/character-classes/__tests__/any-of.test.ts +++ b/src/character-classes/__tests__/any-of.test.ts @@ -3,7 +3,6 @@ import { oneOrMore } from '../../quantifiers/base'; import { anyOf } from '../any-of'; test('"anyOf" base cases', () => { - expect(p(anyOf(''))).toBe(''); expect(p(anyOf('a'))).toBe('a'); expect(p(anyOf('abc'))).toBe('[abc]'); }); @@ -21,3 +20,9 @@ test('"anyOf" escapes special characters', () => { test('"anyOf" moves hyphen to the first position', () => { expect(p(anyOf('a-bc'))).toBe('[-abc]'); }); + +test('`anyOf` throws on empty text', () => { + expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot( + `"\`anyOf\` should received at least one character"` + ); +}); diff --git a/src/character-classes/__tests__/compiler.test.ts b/src/character-classes/__tests__/compiler.test.ts new file mode 100644 index 0000000..f7402b2 --- /dev/null +++ b/src/character-classes/__tests__/compiler.test.ts @@ -0,0 +1,12 @@ +import { compileCharacterClass } from '../compiler'; + +test('buildPattern throws on empty text', () => { + expect(() => + compileCharacterClass({ + type: 'characterClass', + characters: [], + }) + ).toThrowErrorMatchingInlineSnapshot( + `"Character class should contain at least one character"` + ); +}); diff --git a/src/character-classes/any-of.ts b/src/character-classes/any-of.ts index 6776b38..23c0458 100644 --- a/src/character-classes/any-of.ts +++ b/src/character-classes/any-of.ts @@ -2,8 +2,13 @@ import type { CharacterClass } from '../types'; import { escapeText } from '../utils'; export function anyOf(characters: string): CharacterClass { + const charactersArray = characters.split('').map(escapeText); + if (charactersArray.length === 0) { + throw new Error('`anyOf` should received at least one character'); + } + return { type: 'characterClass', - characters: characters.split('').map(escapeText), + characters: charactersArray, }; } diff --git a/src/character-classes/base.ts b/src/character-classes/base.ts index 6042cff..e620ac6 100644 --- a/src/character-classes/base.ts +++ b/src/character-classes/base.ts @@ -1,5 +1,10 @@ import type { CharacterClass } from '../types'; +export const any: CharacterClass = { + type: 'characterClass', + characters: ['.'], +}; + export const whitespace: CharacterClass = { type: 'characterClass', characters: ['\\s'], @@ -14,8 +19,3 @@ export const word: CharacterClass = { type: 'characterClass', characters: ['\\w'], }; - -export const any: CharacterClass = { - type: 'characterClass', - characters: ['.'], -}; diff --git a/src/character-classes/compiler.ts b/src/character-classes/compiler.ts index 74f4644..658e5db 100644 --- a/src/character-classes/compiler.ts +++ b/src/character-classes/compiler.ts @@ -5,10 +5,7 @@ export function compileCharacterClass({ characters, }: CharacterClass): RegexNode { if (characters.length === 0) { - return { - priority: RegexNodePriority.Atom, - pattern: '', - }; + throw new Error('Character class should contain at least one character'); } if (characters.length === 1) { diff --git a/src/compiler.ts b/src/compiler.ts index 3c1dc3d..546f753 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -34,13 +34,7 @@ function compileList(elements: RegexElement[]): RegexNode { function compileSingle(element: RegexElement): RegexNode { if (typeof element === 'string') { - return { - priority: - element.length === 1 - ? RegexNodePriority.Atom - : RegexNodePriority.Sequence, - pattern: escapeText(element), - }; + return compileText(element); } if (element.type === 'characterClass') { @@ -65,3 +59,21 @@ function compileSingle(element: RegexElement): RegexNode { // @ts-expect-error User passed incorrect type throw new Error(`Unknown elements type ${element.type}`); } + +function compileText(text: string): RegexNode { + if (text.length === 0) { + throw new Error('`compileText`: received text should not be empty'); + } + + if (text.length === 1) { + return { + priority: RegexNodePriority.Atom, + pattern: escapeText(text), + }; + } + + return { + priority: RegexNodePriority.Sequence, + pattern: escapeText(text), + }; +} diff --git a/src/components/__tests__/choiceOf.test.ts b/src/components/__tests__/choiceOf.test.ts index 2e1c0ee..c05faf7 100644 --- a/src/components/__tests__/choiceOf.test.ts +++ b/src/components/__tests__/choiceOf.test.ts @@ -30,3 +30,9 @@ test('"choiceOf" using nested regex', () => { ) ).toBe('a{1,3}|(?:bx){5}'); }); + +test('`anyOf` throws on empty options', () => { + expect(() => choiceOf()).toThrowErrorMatchingInlineSnapshot( + `"\`choiceOf\` should receive at least one option"` + ); +}); diff --git a/src/components/choiceOf.ts b/src/components/choiceOf.ts index 36428f3..7b4c2ac 100644 --- a/src/components/choiceOf.ts +++ b/src/components/choiceOf.ts @@ -6,6 +6,10 @@ import { } from '../types-internal'; export function choiceOf(...children: RegexElement[]): ChoiceOf { + if (children.length === 0) { + throw new Error('`choiceOf` should receive at least one option'); + } + return { type: 'choiceOf', children, diff --git a/src/quantifiers/__tests__/repeat.test.tsx b/src/quantifiers/__tests__/repeat.test.tsx index 46d09ed..52a056d 100644 --- a/src/quantifiers/__tests__/repeat.test.tsx +++ b/src/quantifiers/__tests__/repeat.test.tsx @@ -21,3 +21,9 @@ test('"repeat"" optimizes grouping for atoms', () => { expect(buildPattern(repeat({ min: 2 }, digit))).toBe('\\d{2,}'); expect(buildPattern(repeat({ min: 1, max: 5 }, digit))).toBe('\\d{1,5}'); }); + +test('`repeat` throws on no children', () => { + expect(() => repeat({ count: 1 })).toThrowErrorMatchingInlineSnapshot( + `"\`repeat\` should receive at least one element"` + ); +}); diff --git a/src/quantifiers/repeat.ts b/src/quantifiers/repeat.ts index d2f4d5e..6d7ab87 100644 --- a/src/quantifiers/repeat.ts +++ b/src/quantifiers/repeat.ts @@ -6,6 +6,10 @@ export function repeat( config: RepeatConfig, ...children: RegexElement[] ): Repeat { + if (children.length === 0) { + throw new Error('`repeat` should receive at least one element'); + } + return { type: 'repeat', children, From d822a9693a7bfe6384c167ac5e329d5b3b033ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 14 Dec 2023 21:56:30 +0100 Subject: [PATCH 4/6] refactor: naming cleanup --- .../{compiler.test.tsx => encoder.test.tsx} | 2 +- .../__tests__/any-of.test.ts | 16 ++-- src/character-classes/__tests__/base.test.ts | 4 +- .../{compiler.test.ts => encoder.test.ts} | 4 +- .../{compiler.ts => encoder.ts} | 14 ++-- src/compiler.ts | 79 ------------------- src/components/__tests__/choiceOf.test.ts | 2 +- src/components/choiceOf.ts | 14 ++-- src/encoder.ts | 57 +++++++++++++ src/index.ts | 25 +++++- src/index.tsx | 5 -- src/quantifiers/__tests__/base.test.tsx | 4 +- src/quantifiers/__tests__/repeat.test.tsx | 4 +- src/quantifiers/base.ts | 18 ++--- src/quantifiers/repeat.ts | 18 ++--- src/types-internal.ts | 10 +-- src/types.ts | 14 ++-- src/utils.ts | 12 +-- 18 files changed, 146 insertions(+), 156 deletions(-) rename src/__tests__/{compiler.test.tsx => encoder.test.tsx} (97%) rename src/character-classes/__tests__/{compiler.test.ts => encoder.test.ts} (74%) rename src/character-classes/{compiler.ts => encoder.ts} (68%) delete mode 100644 src/compiler.ts create mode 100644 src/encoder.ts delete mode 100644 src/index.tsx diff --git a/src/__tests__/compiler.test.tsx b/src/__tests__/encoder.test.tsx similarity index 97% rename from src/__tests__/compiler.test.tsx rename to src/__tests__/encoder.test.tsx index 921ccb3..caeac43 100644 --- a/src/__tests__/compiler.test.tsx +++ b/src/__tests__/encoder.test.tsx @@ -1,4 +1,4 @@ -import { buildPattern, buildRegex } from '../compiler'; +import { buildPattern, buildRegex } from '..'; import { one, oneOrMore, optionally, zeroOrMore } from '../quantifiers/base'; import { repeat } from '../quantifiers/repeat'; diff --git a/src/character-classes/__tests__/any-of.test.ts b/src/character-classes/__tests__/any-of.test.ts index 859e691..2b412ab 100644 --- a/src/character-classes/__tests__/any-of.test.ts +++ b/src/character-classes/__tests__/any-of.test.ts @@ -1,24 +1,24 @@ -import { buildPattern as p } from '../../compiler'; +import { buildPattern } from '../..'; import { oneOrMore } from '../../quantifiers/base'; import { anyOf } from '../any-of'; test('"anyOf" base cases', () => { - expect(p(anyOf('a'))).toBe('a'); - expect(p(anyOf('abc'))).toBe('[abc]'); + expect(buildPattern(anyOf('a'))).toBe('a'); + expect(buildPattern(anyOf('abc'))).toBe('[abc]'); }); test('"anyOf" in context', () => { - expect(p('x', anyOf('a'), 'x')).toBe('xax'); - expect(p('x', anyOf('abc'), 'x')).toBe('x[abc]x'); - expect(p('x', oneOrMore(anyOf('abc')), 'x')).toBe('x[abc]+x'); + expect(buildPattern('x', anyOf('a'), 'x')).toBe('xax'); + expect(buildPattern('x', anyOf('abc'), 'x')).toBe('x[abc]x'); + expect(buildPattern('x', oneOrMore(anyOf('abc')), 'x')).toBe('x[abc]+x'); }); test('"anyOf" escapes special characters', () => { - expect(p(anyOf('abc-+.'))).toBe('[-abc\\+\\.]'); + expect(buildPattern(anyOf('abc-+.'))).toBe('[-abc\\+\\.]'); }); test('"anyOf" moves hyphen to the first position', () => { - expect(p(anyOf('a-bc'))).toBe('[-abc]'); + expect(buildPattern(anyOf('a-bc'))).toBe('[-abc]'); }); test('`anyOf` throws on empty text', () => { diff --git a/src/character-classes/__tests__/base.test.ts b/src/character-classes/__tests__/base.test.ts index 0638662..37b5861 100644 --- a/src/character-classes/__tests__/base.test.ts +++ b/src/character-classes/__tests__/base.test.ts @@ -1,6 +1,6 @@ -import { any, digit, whitespace, word } from '../base'; -import { buildPattern } from '../../compiler'; +import { buildPattern } from '../..'; import { one } from '../../quantifiers/base'; +import { any, digit, whitespace, word } from '../base'; test('"whitespace" character class', () => { expect(buildPattern(whitespace)).toEqual(`\\s`); diff --git a/src/character-classes/__tests__/compiler.test.ts b/src/character-classes/__tests__/encoder.test.ts similarity index 74% rename from src/character-classes/__tests__/compiler.test.ts rename to src/character-classes/__tests__/encoder.test.ts index f7402b2..6b7ae77 100644 --- a/src/character-classes/__tests__/compiler.test.ts +++ b/src/character-classes/__tests__/encoder.test.ts @@ -1,8 +1,8 @@ -import { compileCharacterClass } from '../compiler'; +import { encodeCharacterClass } from '../encoder'; test('buildPattern throws on empty text', () => { expect(() => - compileCharacterClass({ + encodeCharacterClass({ type: 'characterClass', characters: [], }) diff --git a/src/character-classes/compiler.ts b/src/character-classes/encoder.ts similarity index 68% rename from src/character-classes/compiler.ts rename to src/character-classes/encoder.ts index 658e5db..62a3ff3 100644 --- a/src/character-classes/compiler.ts +++ b/src/character-classes/encoder.ts @@ -1,30 +1,30 @@ import type { CharacterClass } from '../types'; -import { RegexNodePriority, type RegexNode } from '../types-internal'; +import { EncoderPriority, type EncoderNode } from '../types-internal'; -export function compileCharacterClass({ +export function encodeCharacterClass({ characters, -}: CharacterClass): RegexNode { +}: CharacterClass): EncoderNode { if (characters.length === 0) { throw new Error('Character class should contain at least one character'); } if (characters.length === 1) { return { - priority: RegexNodePriority.Atom, + priority: EncoderPriority.Atom, pattern: characters[0]!, }; } return { - priority: RegexNodePriority.Atom, - pattern: `[${escapeHyphen(characters).join('')}]`, + priority: EncoderPriority.Atom, + pattern: `[${reorderHyphen(characters).join('')}]`, }; } // If passed characters includes hyphen (`-`) it need to be moved to // first (or last) place in order to treat it as hyphen character and not a range. // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types -function escapeHyphen(characters: string[]) { +function reorderHyphen(characters: string[]) { if (characters.includes('-')) { return ['-', ...characters.filter((c) => c !== '-')]; } diff --git a/src/compiler.ts b/src/compiler.ts deleted file mode 100644 index 546f753..0000000 --- a/src/compiler.ts +++ /dev/null @@ -1,79 +0,0 @@ -import type { RegexElement } from './types'; -import { RegexNodePriority, type RegexNode } from './types-internal'; -import { compileChoiceOf } from './components/choiceOf'; -import { compileCharacterClass } from './character-classes/compiler'; -import { baseQuantifiers, isBaseQuantifier } from './quantifiers/base'; -import { compileRepeat } from './quantifiers/repeat'; -import { concatNodes, escapeText } from './utils'; - -/** - * Generate RegExp object for elements. - * - * @param elements - * @returns - */ -export function buildRegex(...elements: RegexElement[]): RegExp { - const pattern = compileList(elements).pattern; - return new RegExp(pattern); -} - -/** - * Generate regex pattern for elements. - * @param elements - * @returns - */ -export function buildPattern(...elements: RegexElement[]): string { - return compileList(elements).pattern; -} - -// Recursive compilation - -function compileList(elements: RegexElement[]): RegexNode { - return concatNodes(elements.map((c) => compileSingle(c))); -} - -function compileSingle(element: RegexElement): RegexNode { - if (typeof element === 'string') { - return compileText(element); - } - - if (element.type === 'characterClass') { - return compileCharacterClass(element); - } - - if (element.type === 'choiceOf') { - return compileChoiceOf(element, compileSingle); - } - - if (element.type === 'repeat') { - const compiledChildren = compileList(element.children); - return compileRepeat(element.config, compiledChildren); - } - - if (isBaseQuantifier(element)) { - const compiledChildren = compileList(element.children); - const compiler = baseQuantifiers[element.type]; - return compiler(compiledChildren); - } - - // @ts-expect-error User passed incorrect type - throw new Error(`Unknown elements type ${element.type}`); -} - -function compileText(text: string): RegexNode { - if (text.length === 0) { - throw new Error('`compileText`: received text should not be empty'); - } - - if (text.length === 1) { - return { - priority: RegexNodePriority.Atom, - pattern: escapeText(text), - }; - } - - return { - priority: RegexNodePriority.Sequence, - pattern: escapeText(text), - }; -} diff --git a/src/components/__tests__/choiceOf.test.ts b/src/components/__tests__/choiceOf.test.ts index c05faf7..2ff500b 100644 --- a/src/components/__tests__/choiceOf.test.ts +++ b/src/components/__tests__/choiceOf.test.ts @@ -1,4 +1,4 @@ -import { buildPattern } from '../../compiler'; +import { buildPattern } from '../..'; import { oneOrMore, zeroOrMore } from '../../quantifiers/base'; import { repeat } from '../../quantifiers/repeat'; import { choiceOf } from '../choiceOf'; diff --git a/src/components/choiceOf.ts b/src/components/choiceOf.ts index 7b4c2ac..b61c54e 100644 --- a/src/components/choiceOf.ts +++ b/src/components/choiceOf.ts @@ -1,8 +1,8 @@ import type { ChoiceOf, RegexElement } from '../types'; import { - RegexNodePriority, - type CompileSingle, - type RegexNode, + EncoderPriority, + type EncodeElement, + type EncoderNode, } from '../types-internal'; export function choiceOf(...children: RegexElement[]): ChoiceOf { @@ -16,17 +16,17 @@ export function choiceOf(...children: RegexElement[]): ChoiceOf { }; } -export function compileChoiceOf( +export function encodeChoiceOf( element: ChoiceOf, - compileSingle: CompileSingle -): RegexNode { + compileSingle: EncodeElement +): EncoderNode { const compileNodes = element.children.map(compileSingle); if (compileNodes.length === 1) { return compileNodes[0]!; } return { - priority: RegexNodePriority.Alternation, + priority: EncoderPriority.Alternation, pattern: compileNodes.map((n) => n.pattern).join('|'), }; } diff --git a/src/encoder.ts b/src/encoder.ts new file mode 100644 index 0000000..2a9d26d --- /dev/null +++ b/src/encoder.ts @@ -0,0 +1,57 @@ +import type { RegexElement } from './types'; +import { EncoderPriority, type EncoderNode } from './types-internal'; +import { encodeChoiceOf } from './components/choiceOf'; +import { encodeCharacterClass } from './character-classes/encoder'; +import { baseQuantifiers, isBaseQuantifier } from './quantifiers/base'; +import { encodeRepeat } from './quantifiers/repeat'; +import { concatNodes, escapeText } from './utils'; + +export function encodeSequence(elements: RegexElement[]): EncoderNode { + return concatNodes(elements.map((c) => encodeElement(c))); +} + +export function encodeElement(element: RegexElement): EncoderNode { + if (typeof element === 'string') { + return encodeText(element); + } + + if (element.type === 'characterClass') { + return encodeCharacterClass(element); + } + + if (element.type === 'choiceOf') { + return encodeChoiceOf(element, encodeElement); + } + + if (element.type === 'repeat') { + const compiledChildren = encodeSequence(element.children); + return encodeRepeat(element.config, compiledChildren); + } + + if (isBaseQuantifier(element)) { + const compiledChildren = encodeSequence(element.children); + const encoder = baseQuantifiers[element.type]; + return encoder(compiledChildren); + } + + // @ts-expect-error User passed incorrect type + throw new Error(`Unknown elements type ${element.type}`); +} + +function encodeText(text: string): EncoderNode { + if (text.length === 0) { + throw new Error('`compileText`: received text should not be empty'); + } + + if (text.length === 1) { + return { + priority: EncoderPriority.Atom, + pattern: escapeText(text), + }; + } + + return { + priority: EncoderPriority.Sequence, + pattern: escapeText(text), + }; +} diff --git a/src/index.ts b/src/index.ts index 9e536e0..2d9c1ad 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,9 +1,30 @@ -export type * from './types'; +import type { RegexElement } from './types'; +import { encodeSequence } from './encoder'; -export { buildRegex, buildPattern } from './compiler'; +export type * from './types'; export { any, digit, whitespace, word } from './character-classes/base'; export { anyOf } from './character-classes/any-of'; export { one, oneOrMore, optionally, zeroOrMore } from './quantifiers/base'; export { repeat } from './quantifiers/repeat'; export { choiceOf } from './components/choiceOf'; + +/** + * Generate RegExp object for elements. + * + * @param elements + * @returns + */ +export function buildRegex(...elements: RegexElement[]): RegExp { + const pattern = encodeSequence(elements).pattern; + return new RegExp(pattern); +} + +/** + * Generate regex pattern for elements. + * @param elements + * @returns + */ +export function buildPattern(...elements: RegexElement[]): string { + return encodeSequence(elements).pattern; +} diff --git a/src/index.tsx b/src/index.tsx deleted file mode 100644 index abc20f1..0000000 --- a/src/index.tsx +++ /dev/null @@ -1,5 +0,0 @@ -export type * from './types'; - -export { whitespace } from './character-classes/base'; -export { buildRegex, buildPattern } from './compiler'; -export { oneOrMore, optionally } from './quantifiers/base'; diff --git a/src/quantifiers/__tests__/base.test.tsx b/src/quantifiers/__tests__/base.test.tsx index eec5e68..de1d3a3 100644 --- a/src/quantifiers/__tests__/base.test.tsx +++ b/src/quantifiers/__tests__/base.test.tsx @@ -1,6 +1,6 @@ -import { one, oneOrMore, optionally, zeroOrMore } from '../base'; -import { buildPattern, buildRegex } from '../../compiler'; +import { buildPattern, buildRegex } from '../..'; import { digit } from '../../character-classes/base'; +import { one, oneOrMore, optionally, zeroOrMore } from '../base'; test('"oneOrMore" quantifier', () => { expect(buildPattern(oneOrMore('a'))).toEqual('a+'); diff --git a/src/quantifiers/__tests__/repeat.test.tsx b/src/quantifiers/__tests__/repeat.test.tsx index 52a056d..9340ab5 100644 --- a/src/quantifiers/__tests__/repeat.test.tsx +++ b/src/quantifiers/__tests__/repeat.test.tsx @@ -1,7 +1,7 @@ +import { buildPattern } from '../..'; +import { digit } from '../../character-classes/base'; import { zeroOrMore, oneOrMore } from '../base'; import { repeat } from '../repeat'; -import { digit } from '../../character-classes/base'; -import { buildPattern } from '../../compiler'; test('"repeat" quantifier', () => { expect(buildPattern('a', repeat({ min: 1, max: 5 }, 'b'))).toEqual('ab{1,5}'); diff --git a/src/quantifiers/base.ts b/src/quantifiers/base.ts index 60a78ad..ef0c5fd 100644 --- a/src/quantifiers/base.ts +++ b/src/quantifiers/base.ts @@ -6,8 +6,8 @@ import type { RegexElement, ZeroOrMore, } from '../types'; -import { asAtom } from '../utils'; -import { RegexNodePriority, type RegexNode } from '../types-internal'; +import { toAtom } from '../utils'; +import { EncoderPriority, type EncoderNode } from '../types-internal'; export function one(...children: RegexElement[]): One { return { @@ -43,23 +43,23 @@ export const baseQuantifiers = { }, oneOrMore: (node) => { return { - priority: RegexNodePriority.Sequence, - pattern: `${asAtom(node)}+`, + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}+`, }; }, optionally: (node) => { return { - priority: RegexNodePriority.Sequence, - pattern: `${asAtom(node)}?`, + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}?`, }; }, zeroOrMore: (node) => { return { - priority: RegexNodePriority.Sequence, - pattern: `${asAtom(node)}*`, + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}*`, }; }, -} as const satisfies Record RegexNode>; +} as const satisfies Record EncoderNode>; export function isBaseQuantifier( element: Exclude diff --git a/src/quantifiers/repeat.ts b/src/quantifiers/repeat.ts index 6d7ab87..76a17de 100644 --- a/src/quantifiers/repeat.ts +++ b/src/quantifiers/repeat.ts @@ -1,6 +1,6 @@ import type { RegexElement, Repeat, RepeatConfig } from '../types'; -import { RegexNodePriority, type RegexNode } from '../types-internal'; -import { asAtom } from '../utils'; +import { EncoderPriority, type EncoderNode } from '../types-internal'; +import { toAtom } from '../utils'; export function repeat( config: RepeatConfig, @@ -17,19 +17,19 @@ export function repeat( }; } -export function compileRepeat( +export function encodeRepeat( config: RepeatConfig, - node: RegexNode -): RegexNode { + node: EncoderNode +): EncoderNode { if ('count' in config) { return { - priority: RegexNodePriority.Sequence, - pattern: `${asAtom(node)}{${config.count}}`, + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}{${config.count}}`, }; } return { - priority: RegexNodePriority.Sequence, - pattern: `${asAtom(node)}{${config.min},${config?.max ?? ''}}`, + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}{${config.min},${config?.max ?? ''}}`, }; } diff --git a/src/types-internal.ts b/src/types-internal.ts index edfcb7b..2065e4e 100644 --- a/src/types-internal.ts +++ b/src/types-internal.ts @@ -3,15 +3,15 @@ import type { RegexElement } from './types'; /** * Compiled regex pattern with information about its type (atom, sequence) */ -export interface RegexNode { +export interface EncoderNode { pattern: string; - priority: RegexNodePriority; + priority: EncoderPriority; } /** * Higher is more important. */ -export const RegexNodePriority = { +export const EncoderPriority = { // Atoms: single characters, character classes (`\d`, `[a-z]`), // capturing and non-capturing groups (`()`) Atom: 3, @@ -24,6 +24,6 @@ export const RegexNodePriority = { } as const; type ValueOf = T[keyof T]; -type RegexNodePriority = ValueOf; +type EncoderPriority = ValueOf; -export type CompileSingle = (element: RegexElement) => RegexNode; +export type EncodeElement = (element: RegexElement) => EncoderNode; diff --git a/src/types.ts b/src/types.ts index 32b4ed6..c65ccd3 100644 --- a/src/types.ts +++ b/src/types.ts @@ -29,11 +29,10 @@ export type Optionally = { children: RegexElement[]; }; -export type RepeatConfig = - | { min: number; max?: number } - | { - count: number; - }; +export type ZeroOrMore = { + type: 'zeroOrMore'; + children: RegexElement[]; +}; export type Repeat = { type: 'repeat'; @@ -41,7 +40,4 @@ export type Repeat = { config: RepeatConfig; }; -export type ZeroOrMore = { - type: 'zeroOrMore'; - children: RegexElement[]; -}; +export type RepeatConfig = { count: number } | { min: number; max?: number }; diff --git a/src/utils.ts b/src/utils.ts index f92921c..4383111 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,4 @@ -import { RegexNodePriority, type RegexNode } from './types-internal'; +import { EncoderPriority, type EncoderNode } from './types-internal'; /** * Returns atomic pattern for given node. @@ -6,24 +6,24 @@ import { RegexNodePriority, type RegexNode } from './types-internal'; * @param node * @returns */ -export function asAtom(node: RegexNode): string { - if (node.priority === RegexNodePriority.Atom) { +export function toAtom(node: EncoderNode): string { + if (node.priority === EncoderPriority.Atom) { return node.pattern; } return `(?:${node.pattern})`; } -export function concatNodes(nodes: RegexNode[]): RegexNode { +export function concatNodes(nodes: EncoderNode[]): EncoderNode { if (nodes.length === 1) { return nodes[0]!; } return { - priority: RegexNodePriority.Sequence, + priority: EncoderPriority.Sequence, pattern: nodes .map((n) => - n.priority < RegexNodePriority.Sequence ? asAtom(n) : n.pattern + n.priority < EncoderPriority.Sequence ? toAtom(n) : n.pattern ) .join(''), }; From f251cd79cc632114c8c1cee8a15f29fa89c44ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 14 Dec 2023 22:07:36 +0100 Subject: [PATCH 5/6] refactor: simplify base quantifier encoders --- src/encoder.ts | 28 ++++++++++++++++------ src/quantifiers/base.ts | 52 ++++++++++++++++++----------------------- 2 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/encoder.ts b/src/encoder.ts index 2a9d26d..7c6c7a6 100644 --- a/src/encoder.ts +++ b/src/encoder.ts @@ -2,7 +2,12 @@ import type { RegexElement } from './types'; import { EncoderPriority, type EncoderNode } from './types-internal'; import { encodeChoiceOf } from './components/choiceOf'; import { encodeCharacterClass } from './character-classes/encoder'; -import { baseQuantifiers, isBaseQuantifier } from './quantifiers/base'; +import { + encodeOne, + encodeOneOrMore, + encodeOptionally, + encodeZeroOrMore, +} from './quantifiers/base'; import { encodeRepeat } from './quantifiers/repeat'; import { concatNodes, escapeText } from './utils'; @@ -24,14 +29,23 @@ export function encodeElement(element: RegexElement): EncoderNode { } if (element.type === 'repeat') { - const compiledChildren = encodeSequence(element.children); - return encodeRepeat(element.config, compiledChildren); + return encodeRepeat(element.config, encodeSequence(element.children)); } - if (isBaseQuantifier(element)) { - const compiledChildren = encodeSequence(element.children); - const encoder = baseQuantifiers[element.type]; - return encoder(compiledChildren); + if (element.type === 'one') { + return encodeOne(encodeSequence(element.children)); + } + + if (element.type === 'oneOrMore') { + return encodeOneOrMore(encodeSequence(element.children)); + } + + if (element.type === 'optionally') { + return encodeOptionally(encodeSequence(element.children)); + } + + if (element.type === 'zeroOrMore') { + return encodeZeroOrMore(encodeSequence(element.children)); } // @ts-expect-error User passed incorrect type diff --git a/src/quantifiers/base.ts b/src/quantifiers/base.ts index ef0c5fd..7dda93a 100644 --- a/src/quantifiers/base.ts +++ b/src/quantifiers/base.ts @@ -2,12 +2,11 @@ import type { One, OneOrMore, Optionally, - Quantifier, RegexElement, ZeroOrMore, } from '../types'; -import { toAtom } from '../utils'; import { EncoderPriority, type EncoderNode } from '../types-internal'; +import { toAtom } from '../utils'; export function one(...children: RegexElement[]): One { return { @@ -37,32 +36,27 @@ export function zeroOrMore(...children: RegexElement[]): ZeroOrMore { }; } -export const baseQuantifiers = { - one: (node) => { - return node; - }, - oneOrMore: (node) => { - return { - priority: EncoderPriority.Sequence, - pattern: `${toAtom(node)}+`, - }; - }, - optionally: (node) => { - return { - priority: EncoderPriority.Sequence, - pattern: `${toAtom(node)}?`, - }; - }, - zeroOrMore: (node) => { - return { - priority: EncoderPriority.Sequence, - pattern: `${toAtom(node)}*`, - }; - }, -} as const satisfies Record EncoderNode>; +export function encodeOne(node: EncoderNode) { + return node; +} + +export function encodeOneOrMore(node: EncoderNode): EncoderNode { + return { + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}+`, + }; +} -export function isBaseQuantifier( - element: Exclude -): element is Quantifier { - return element.type in baseQuantifiers; +export function encodeOptionally(node: EncoderNode): EncoderNode { + return { + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}?`, + }; +} + +export function encodeZeroOrMore(node: EncoderNode): EncoderNode { + return { + priority: EncoderPriority.Sequence, + pattern: `${toAtom(node)}*`, + }; } From f50fdb882b485f408898bf3886d5ab896eb64c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 14 Dec 2023 22:12:35 +0100 Subject: [PATCH 6/6] refactor: update stale names --- src/__tests__/encoder.test.tsx | 2 +- src/components/choiceOf.ts | 10 +++++----- src/encoder.ts | 2 +- src/types-internal.ts | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/__tests__/encoder.test.tsx b/src/__tests__/encoder.test.tsx index caeac43..56fc975 100644 --- a/src/__tests__/encoder.test.tsx +++ b/src/__tests__/encoder.test.tsx @@ -59,6 +59,6 @@ test('buildRegex throws error on unknown element', () => { test('buildPattern throws on empty text', () => { expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot( - `"\`compileText\`: received text should not be empty"` + `"\`encodeText\`: received text should not be empty"` ); }); diff --git a/src/components/choiceOf.ts b/src/components/choiceOf.ts index b61c54e..0d1afa8 100644 --- a/src/components/choiceOf.ts +++ b/src/components/choiceOf.ts @@ -18,15 +18,15 @@ export function choiceOf(...children: RegexElement[]): ChoiceOf { export function encodeChoiceOf( element: ChoiceOf, - compileSingle: EncodeElement + encodeElement: EncodeElement ): EncoderNode { - const compileNodes = element.children.map(compileSingle); - if (compileNodes.length === 1) { - return compileNodes[0]!; + const encodedNodes = element.children.map(encodeElement); + if (encodedNodes.length === 1) { + return encodedNodes[0]!; } return { priority: EncoderPriority.Alternation, - pattern: compileNodes.map((n) => n.pattern).join('|'), + pattern: encodedNodes.map((n) => n.pattern).join('|'), }; } diff --git a/src/encoder.ts b/src/encoder.ts index 7c6c7a6..5438656 100644 --- a/src/encoder.ts +++ b/src/encoder.ts @@ -54,7 +54,7 @@ export function encodeElement(element: RegexElement): EncoderNode { function encodeText(text: string): EncoderNode { if (text.length === 0) { - throw new Error('`compileText`: received text should not be empty'); + throw new Error('`encodeText`: received text should not be empty'); } if (text.length === 1) { diff --git a/src/types-internal.ts b/src/types-internal.ts index 2065e4e..6496e82 100644 --- a/src/types-internal.ts +++ b/src/types-internal.ts @@ -1,7 +1,7 @@ import type { RegexElement } from './types'; /** - * Compiled regex pattern with information about its type (atom, sequence) + * Encoded regex pattern with information about its type (atom, sequence) */ export interface EncoderNode { pattern: string;