From 46240978779f185072ca141c02299d0918dab1de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Wed, 13 Dec 2023 22:17:00 +0100 Subject: [PATCH] feat: base (non-named) capture --- src/__tests__/capture.test.tsx | 21 +++++++++++++++++++++ src/capture.ts | 16 ++++++++++++++++ src/encoder.ts | 5 +++++ src/test-utils.ts | 12 ++++++++++++ src/types.ts | 13 ++++++++++++- 5 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/__tests__/capture.test.tsx create mode 100644 src/capture.ts create mode 100644 src/test-utils.ts diff --git a/src/__tests__/capture.test.tsx b/src/__tests__/capture.test.tsx new file mode 100644 index 0000000..4169b72 --- /dev/null +++ b/src/__tests__/capture.test.tsx @@ -0,0 +1,21 @@ +import { buildPattern } from '..'; +import { capture } from '../capture'; +import { oneOrMore } from '../quantifiers/base'; +import { execRegex } from '../test-utils'; + +test('"capture" base cases', () => { + expect(buildPattern(capture('a'))).toBe('(a)'); + expect(buildPattern(capture('abc'))).toBe('(abc)'); + expect(buildPattern(capture(oneOrMore('abc')))).toBe('((?:abc)+)'); + expect(buildPattern(oneOrMore(capture('abc')))).toBe('(abc)+'); +}); + +test('"capture" captures group', () => { + expect(execRegex('ab', [capture('b')])).toEqual(['b', 'b']); + expect(execRegex('ab', ['a', capture('b')])).toEqual(['ab', 'b']); + expect(execRegex('abc', ['a', capture('b'), capture('c')])).toEqual([ + 'abc', + 'b', + 'c', + ]); +}); diff --git a/src/capture.ts b/src/capture.ts new file mode 100644 index 0000000..d474629 --- /dev/null +++ b/src/capture.ts @@ -0,0 +1,16 @@ +import type { Capture, RegexElement } from './types'; +import { EncoderPriority, type EncoderNode } from './types-internal'; + +export function capture(...children: RegexElement[]): Capture { + return { + type: 'capture', + children, + }; +} + +export function encodeCapture(node: EncoderNode): EncoderNode { + return { + pattern: `(${node.pattern})`, + priority: EncoderPriority.Atom, + }; +} diff --git a/src/encoder.ts b/src/encoder.ts index 5438656..6e31d42 100644 --- a/src/encoder.ts +++ b/src/encoder.ts @@ -10,6 +10,7 @@ import { } from './quantifiers/base'; import { encodeRepeat } from './quantifiers/repeat'; import { concatNodes, escapeText } from './utils'; +import { encodeCapture } from './capture'; export function encodeSequence(elements: RegexElement[]): EncoderNode { return concatNodes(elements.map((c) => encodeElement(c))); @@ -48,6 +49,10 @@ export function encodeElement(element: RegexElement): EncoderNode { return encodeZeroOrMore(encodeSequence(element.children)); } + if (element.type === 'capture') { + return encodeCapture(encodeSequence(element.children)); + } + // @ts-expect-error User passed incorrect type throw new Error(`Unknown elements type ${element.type}`); } diff --git a/src/test-utils.ts b/src/test-utils.ts new file mode 100644 index 0000000..a15e65c --- /dev/null +++ b/src/test-utils.ts @@ -0,0 +1,12 @@ +import { buildRegex } from '.'; +import type { RegexElement } from './types'; + +export function execRegex(text: string, elements: RegexElement[]) { + const regex = buildRegex(...elements); + return [...regex.exec(text)!]; +} + +export function execRegexFull(text: string, elements: RegexElement[]) { + const regex = buildRegex(...elements); + return regex.exec(text)!; +} diff --git a/src/types.ts b/src/types.ts index c65ccd3..5679086 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,4 +1,9 @@ -export type RegexElement = string | ChoiceOf | CharacterClass | Quantifier; +export type RegexElement = + | string + | CharacterClass + | ChoiceOf + | Quantifier + | Capture; export type Quantifier = One | OneOrMore | Optionally | ZeroOrMore | Repeat; @@ -41,3 +46,9 @@ export type Repeat = { }; export type RepeatConfig = { count: number } | { min: number; max?: number }; + +// Captures +export type Capture = { + type: 'capture'; + children: RegexElement[]; +};