Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: named capture groups and reference (alternative approach) #78

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions jest-setup.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import './test-utils/to-equal-regex';
import './test-utils/to-match-groups';
import './test-utils/to-match-all-groups';
import './test-utils/to-match-named-groups';
import './test-utils/to-match-all-named-groups';
import './test-utils/to-match-string';
30 changes: 30 additions & 0 deletions src/__tests__/example-html-tags.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { any, buildRegExp, capture, oneOrMore, zeroOrMore } from '..';

test('example: html tag matching', () => {
const tagName = capture(
oneOrMore(/[a-z0-9]/), //
{ name: 'tag' },
);
const tagContent = capture(
zeroOrMore(any, { greedy: false }), //
{ name: 'content' },
);

const tagMatcher = buildRegExp(['<', tagName, '>', tagContent, '</', tagName.ref(), '>'], {
ignoreCase: true,
global: true,
});

expect(tagMatcher).toMatchAllNamedGroups('<a>abc</a>', [{ tag: 'a', content: 'abc' }]);
expect(tagMatcher).toMatchAllNamedGroups('<a><b>abc</b></a>', [
{ tag: 'a', content: '<b>abc</b>' },
]);
expect(tagMatcher).toMatchAllNamedGroups('<a>abc1</a><b>abc2</b>', [
{ tag: 'a', content: 'abc1' },
{ tag: 'b', content: 'abc2' },
]);

expect(tagMatcher).not.toMatchString('<a>abc</b>');

expect(tagMatcher).toEqualRegex('<(?<tag>[a-z0-9]+)>(?<content>.*?)<\\/\\k<tag>>');
});
106 changes: 105 additions & 1 deletion src/constructs/__tests__/capture.test.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
import { capture, oneOrMore } from '../..';
import {
any,
anyOf,
buildRegExp,
capture,
digit,
negated,
oneOrMore,
word,
wordBoundary,
} from '../..';

test('`capture` pattern', () => {
expect(capture('a')).toEqualRegex(/(a)/);
Expand All @@ -12,3 +22,97 @@ test('`capture` matching', () => {
expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']);
expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']);
});

test('named `capture` pattern', () => {
expect(capture('a', { name: 'xyz' })).toEqualRegex('(?<xyz>a)');
expect(capture('abc', { name: 'xyz' })).toEqualRegex('(?<xyz>abc)');
expect(capture(oneOrMore('abc'), { name: 'xyz' })).toEqualRegex('(?<xyz>(?:abc)+)');
expect(oneOrMore(capture('abc', { name: 'xyz' }))).toEqualRegex('(?<xyz>abc)+');
});

test('named `capture` matching', () => {
expect(capture('b', { name: 'x1' })).toMatchGroups('ab', ['b', 'b']);
expect(capture('b', { name: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' });

expect(['a', capture('b', { name: 'x1' })]).toMatchGroups('ab', ['ab', 'b']);
expect(['a', capture('b', { name: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' });

expect([capture('a'), capture('b', { name: 'x1' }), capture('c', { name: 'x2' })]).toMatchGroups(
'abc',
['abc', 'a', 'b', 'c'],
);
expect([
capture('a'),
capture('b', { name: 'x1' }),
capture('c', { name: 'x2' }),
]).toMatchNamedGroups('abc', { x1: 'b', x2: 'c' });
});

test('`ref` function', () => {
const someCapture = capture(any, { name: 'ref0' });
expect([someCapture, ' ', someCapture.ref()]).toEqualRegex('(?<ref0>.) \\k<ref0>');

const otherCapture = capture(any, { name: 'r123' });
expect(['xx', otherCapture, ' ', otherCapture.ref(), 'xx']).toEqualRegex(
'xx(?<r123>.) \\k<r123>xx',
);
});

test('`reference` matching basic case', () => {
const wordCapture = capture(word, { name: 'word' });
expect([wordCapture, wordCapture.ref()]).toMatchString('aa');

const digitCapture = capture(digit, { name: 'digit' });
expect([digitCapture, digitCapture.ref()]).toMatchString('11');

const anyCapture = capture(any, { name: 'any' });
expect([anyCapture, anyCapture.ref()]).not.toMatchString('ab');

expect([digitCapture, digitCapture.ref()]).not.toMatchString('1a');
expect([digitCapture, digitCapture.ref()]).not.toMatchString('a1');
});

test('`reference` matching HTML attributes', () => {
const quote = anyOf('"\'');
const quoteCapture = capture(quote, { name: 'quote' });

const htmlAttributeRegex = buildRegExp([
wordBoundary,
capture(oneOrMore(word), { name: 'name' }),
'=',
quoteCapture,
capture(oneOrMore(negated(quote)), { name: 'value' }),
quoteCapture.ref(),
]);

expect(htmlAttributeRegex).toMatchNamedGroups('a="b"', {
name: 'a',
quote: '"',
value: 'b',
});
expect(htmlAttributeRegex).toMatchNamedGroups('aa="bbb"', {
name: 'aa',
quote: '"',
value: 'bbb',
});
expect(htmlAttributeRegex).toMatchNamedGroups(`aa='bbb'`, {
name: 'aa',
quote: `'`,
value: 'bbb',
});
expect(htmlAttributeRegex).toMatchNamedGroups('<input type="number" />', {
quote: '"',
name: 'type',
value: 'number',
});
expect(htmlAttributeRegex).toMatchNamedGroups(`<input type='number' />`, {
quote: "'",
name: 'type',
value: 'number',
});

expect(htmlAttributeRegex).not.toMatchString(`aa="bbb'`);
expect(htmlAttributeRegex).not.toMatchString(`aa='bbb"`);
expect(htmlAttributeRegex).not.toMatchString(`<input type='number" />`);
expect(htmlAttributeRegex).not.toMatchString(`<input type="number' />`);
});
57 changes: 56 additions & 1 deletion src/constructs/capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,74 @@ import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
export interface Capture extends RegexConstruct {
type: 'capture';
children: RegexElement[];
options?: CaptureOptions;

/** Creates a backreference to a capturing group.
* It allows to match the same text that was previously captured by the capturing group.
*
* Note: requires `name` option to be passed.
*/
ref: () => Backreference;
}

export type CaptureOptions = {
/**
* Name to be given to the capturing group can either by a string or {@link ref} instance.
*/
name: string;
};

export interface Backreference extends RegexConstruct {
type: 'backreference';
name: string;
}

export function capture(sequence: RegexSequence): Capture {
/**
* Creates a capturing group which allows the matched pattern to be available:
* - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`)
* - in the regex itself, through {@link ref}
*/
export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture {
return {
type: 'capture',
children: ensureArray(sequence),
options,
ref: generateRef,
encode: encodeCapture,
};
}

function generateRef(this: Capture): Backreference {
const name = this.options?.name;
if (!name) {
throw new Error('Capture group "name" is required when calling "ref()".');
}

return {
type: 'backreference',
name,
encode: encodeBackreference,
};
}

function encodeCapture(this: Capture): EncodeResult {
const name = this.options?.name;
if (name) {
return {
precedence: 'atom',
pattern: `(?<${name}>${encodeSequence(this.children).pattern})`,
};
}

return {
precedence: 'atom',
pattern: `(${encodeSequence(this.children).pattern})`,
};
}

function encodeBackreference(this: Backreference): EncodeResult {
return {
precedence: 'atom',
pattern: `\\k<${this.name}>`,
};
}
6 changes: 6 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
// Types
export type * from './types';
export type { CaptureOptions } from './constructs/capture';
export type { QuantifierOptions } from './constructs/quantifiers';
export type { RepeatOptions } from './constructs/repeat';

// Builders
export { buildPattern, buildRegExp } from './builders';

// Constructs
export {
endOfString,
nonWordBoundary,
Expand Down
13 changes: 9 additions & 4 deletions test-utils/to-equal-regex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@ import { wrapRegExp } from './utils';
export function toEqualRegex(
this: jest.MatcherContext,
received: RegExp | RegexSequence,
expected: RegExp,
expected: RegExp | string,
) {
received = wrapRegExp(received);

const options = {
isNot: this.isNot,
};

const expectedSource = typeof expected === 'string' ? expected : expected.source;
const expectedFlags = typeof expected === 'string' ? undefined : expected.flags;

return {
pass: expected.source === received.source && expected.flags === received.flags,
pass:
expectedSource === received.source &&
(expectedFlags === undefined || expectedFlags === received.flags),
message: () =>
this.utils.matcherHint('toHavePattern', undefined, undefined, options) +
this.utils.matcherHint('toEqualRegex', undefined, undefined, options) +
'\n\n' +
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` +
`Received: ${this.utils.printReceived(received)}`,
Expand All @@ -28,7 +33,7 @@ declare global {
namespace jest {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
interface Matchers<R, T = {}> {
toEqualRegex(expected: RegExp): R;
toEqualRegex(expected: RegExp | string): R;
}
}
}
36 changes: 36 additions & 0 deletions test-utils/to-match-all-named-groups.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import type { RegexSequence } from '../src/types';
import { wrapRegExp } from './utils';

export function toMatchAllNamedGroups(
this: jest.MatcherContext,
received: RegExp | RegexSequence,
inputText: string,
expectedGroups: Array<Record<string, string>>,
) {
const receivedRegex = wrapRegExp(received);
const matchResult = inputText.matchAll(receivedRegex);
const receivedGroups = matchResult ? [...matchResult].map((r) => r.groups) : null;
const options = {
isNot: this.isNot,
};

return {
pass: this.equals(receivedGroups, expectedGroups),
message: () =>
this.utils.matcherHint('toMatchGroups', undefined, undefined, options) +
'\n\n' +
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` +
`Received: ${this.utils.printReceived(receivedGroups)}`,
};
}

expect.extend({ toMatchAllNamedGroups });

declare global {
namespace jest {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
interface Matchers<R, T = {}> {
toMatchAllNamedGroups(inputText: string, expectedGroups: Array<Record<string, string>>): R;
}
}
}
6 changes: 3 additions & 3 deletions test-utils/to-match-groups.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ import { wrapRegExp } from './utils';
export function toMatchGroups(
this: jest.MatcherContext,
received: RegExp | RegexSequence,
expectedString: string,
inputText: string,
expectedGroups: string[],
) {
const receivedRegex = wrapRegExp(received);
const matchResult = expectedString.match(receivedRegex);
const matchResult = inputText.match(receivedRegex);
const receivedGroups = matchResult ? [...matchResult] : null;
const options = {
isNot: this.isNot,
Expand All @@ -30,7 +30,7 @@ declare global {
namespace jest {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
interface Matchers<R, T = {}> {
toMatchGroups(input: string, expected: string[]): R;
toMatchGroups(inputText: string, expectedGroups: string[]): R;
}
}
}
36 changes: 36 additions & 0 deletions test-utils/to-match-named-groups.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import type { RegexSequence } from '../src/types';
import { wrapRegExp } from './utils';

export function toMatchNamedGroups(
this: jest.MatcherContext,
received: RegExp | RegexSequence,
inputText: string,
expectedGroups: Record<string, string>,
) {
const receivedRegex = wrapRegExp(received);
const matchResult = inputText.match(receivedRegex);
const receivedGroups = matchResult ? matchResult.groups : null;
const options = {
isNot: this.isNot,
};

return {
pass: this.equals(receivedGroups, expectedGroups),
message: () =>
this.utils.matcherHint('toMatchGroups', undefined, undefined, options) +
'\n\n' +
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` +
`Received: ${this.utils.printReceived(receivedGroups)}`,
};
}

expect.extend({ toMatchNamedGroups });

declare global {
namespace jest {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
interface Matchers<R, T = {}> {
toMatchNamedGroups(inputText: string, expectedGroups: Record<string, string>): R;
}
}
}
Loading
Loading