diff --git a/babel.config.js b/babel.config.js index 27db189..49b24a0 100644 --- a/babel.config.js +++ b/babel.config.js @@ -1,3 +1,3 @@ module.exports = { - presets: ['@babel/preset-env', '@babel/preset-typescript'], + presets: [['@babel/preset-env', { targets: { node: '10.0' } }], '@babel/preset-typescript'], }; diff --git a/docs/API.md b/docs/API.md index a738084..a0282cc 100644 --- a/docs/API.md +++ b/docs/API.md @@ -74,6 +74,41 @@ Captures, also known as capturing groups, extract and store parts of the matched > [!NOTE] > TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. E.g., `zeroOrMore(["abc"])` is encoded as `(?:abc)+`. +### `backreference()` + +```ts +function backreference( + groupNumber: number +): Backreference +``` + +Regex syntax: `\1 \2`. + +A backreference is a way to match the same text as previously matched by a capturing group. + +### `namedCapture()` + +```ts +function namedCapture( + sequence: RegexSequence +): NamedCapture +``` + +Regex syntax: `(...)`. + +A named capturing group is a capturing group that give a name to the group. The group's matching result can later be identified by this name. + +### `namedBackreference()` + +```ts +function namedBackreference( + groupName: string +): NamedBackreference +``` + +Regex syntax: `\k{groupName}`. + +A named backreference is a way to match the same text as previously matched by a named capturing group. ### `lookahead()` ```ts diff --git a/src/__tests__/example-date.ts b/src/__tests__/example-date.ts new file mode 100644 index 0000000..f8a75b2 --- /dev/null +++ b/src/__tests__/example-date.ts @@ -0,0 +1,18 @@ +import { buildRegExp, digit, endOfString, namedCapture, repeat, startOfString } from '..'; + +// Example: dateRegex +const dateRegex = /^(?\d{4})-(?\d{2})-(?\d{2})$/i; +const yearRegex = namedCapture(repeat(digit, 4), 'year'); +const monthRegex = namedCapture(repeat(digit, 2), 'month'); +const dayRegex = namedCapture(repeat(digit, 2), 'day'); +const regex = buildRegExp([startOfString, yearRegex, '-', monthRegex, '-', dayRegex, endOfString], { + ignoreCase: true, +}); + +test('dateRegex', () => { + expect(dateRegex).toEqual(regex); +}); + +test('dateRegex matching', () => { + expect(dateRegex).toMatchGroups('2021-08-24', ['2021-08-24', '2021', '08', '24']); +}); diff --git a/src/__tests__/example-email-advanced.ts b/src/__tests__/example-email-advanced.ts new file mode 100644 index 0000000..59fb280 --- /dev/null +++ b/src/__tests__/example-email-advanced.ts @@ -0,0 +1,90 @@ +import { + anyOf, + buildRegExp, + charClass, + charRange, + digit, + endOfString, + namedCapture, + oneOrMore, + repeat, + startOfString, +} from '..'; + +// +// Example: email validation building blocks +// +const upperCase = charRange('A', 'Z'); +const lowerCase = charRange('a', 'z'); +const specialChars = anyOf("!#$%&'*+/=?^_`{|}~-"); +const usernameChars = charClass(upperCase, lowerCase, digit, specialChars); +const hostnameChars = charClass(upperCase, lowerCase, digit, specialChars); +const domainChars = charRange('a', 'z'); +const emailSeparator = anyOf('.'); +const domainSeparator = anyOf('@'); + +// +// Example: email validation majour components using named capture. +// +const username = namedCapture(oneOrMore(usernameChars), 'username'); + +const usernameRegex = buildRegExp([startOfString, username, endOfString]); + +test('Matching the Username component.', () => { + expect(usernameRegex).toMatchString('john1234'); + expect(usernameRegex).toMatchString('ringo$1234'); + expect(usernameRegex).not.toMatchString('john@1234'); + expect(usernameRegex).not.toMatchString('george.harrison'); + expect(usernameRegex).not.toMatchString('paul.mccartney&wings'); + expect(usernameRegex).not.toMatchString('ringo starr'); +}); + +const hostname = namedCapture(oneOrMore(hostnameChars), 'hostname'); + +const hostnameRegex = buildRegExp([startOfString, hostname, endOfString]); + +test('Matching the Hostname component.', () => { + expect(hostnameRegex).toMatchString('gmail'); + expect(hostnameRegex).toMatchString('google'); + expect(hostnameRegex).toMatchString('g-mail'); + expect(hostnameRegex).toMatchString('g_mail'); + expect(hostnameRegex).not.toMatchString('g mail'); + expect(hostnameRegex).not.toMatchString('g.mail'); +}); + +const domain = namedCapture(repeat(domainChars, { min: 2 }), 'domain'); + +const domainRegex = buildRegExp([startOfString, domain, endOfString]); + +test('Matching the Domain component.', () => { + expect(domainRegex).toMatchString('com'); + expect(domainRegex).toMatchString('org'); + expect(domainRegex).not.toMatchString('c'); + expect(domainRegex).not.toMatchString('o'); + expect(domainRegex).toMatchString('co'); +}); + +test('example: email validation', () => { + const regex = buildRegExp( + [startOfString, username, domainSeparator, hostname, emailSeparator, domain, endOfString], + { ignoreCase: true }, + ); + + expect(regex).toMatchString('aaa@gmail.co'); + expect(regex).toMatchString('aaa@gmail.com'); + expect(regex).toMatchString('Aaa@GMail.Com'); + expect(regex).not.toMatchString('aaa@long.domain.example.com'); + + expect(regex).not.toMatchString('@'); + expect(regex).not.toMatchString('aaa@'); + expect(regex).not.toMatchString('a@gmail.c'); + expect(regex).not.toMatchString('@gmail.com'); + + const emailAddress = 'abba@gold.com'; + const match = regex.exec(emailAddress); + expect(match).not.toBeNull(); + expect(match?.groups).not.toBeNull(); + expect(match?.groups?.username).toBe('abba'); + expect(match?.groups?.hostname).toBe('gold'); + expect(match?.groups?.domain).toBe('com'); +}); diff --git a/src/__tests__/example-url-advanced.ts b/src/__tests__/example-url-advanced.ts index db13913..ca47490 100644 --- a/src/__tests__/example-url-advanced.ts +++ b/src/__tests__/example-url-advanced.ts @@ -22,8 +22,8 @@ const uppercase = charRange('A', 'Z'); const hyphen = anyOf('-'); const alphabetical = charClass(lowercase, uppercase); const specialChars = anyOf('._%+-'); -const portSeperator = ':'; -const schemeSeperator = ':'; +const portSeparator = ':'; +const schemeSeparator = ':'; const doubleSlash = '//'; const at = '@'; const pathSeparator = '/'; @@ -63,7 +63,7 @@ const userInfo = oneOrMore(usernameChars); const hostname = repeat(hostnameChars, { min: 1, max: 63 }); const hostnameEnd = capture([hostname, endOfString]); const host = capture([oneOrMore([hostname, '.'])]); -const port = [portSeperator, oneOrMore(digit)]; +const port = [portSeparator, oneOrMore(digit)]; const authority = [doubleSlash, optional([userInfo, at]), hostname, optional(port)]; const authorityRegex = buildRegExp([startOfString, ...authority, endOfString], { @@ -162,7 +162,7 @@ const urlRegex = buildRegExp( startOfString, capture([ optional(scheme), - schemeSeperator, + schemeSeparator, optional(authority), path, optional(query), diff --git a/src/constructs/__tests__/backreference.test.tsx b/src/constructs/__tests__/backreference.test.tsx new file mode 100644 index 0000000..093ad07 --- /dev/null +++ b/src/constructs/__tests__/backreference.test.tsx @@ -0,0 +1,75 @@ +import { backreference, buildRegExp, capture } from '../..'; + +describe('backreference function', () => { + it('should create a backreference to a previously captured group', () => { + const group = capture('a'); + const backRef = backreference(1); + const groupRegex = buildRegExp([group, backRef]); + + const match = groupRegex.exec('aa'); + expect(match).not.toBeNull(); + expect(match?.[0]).toBe('aa'); + }); + + it('should not match when the backreference does not match the captured group', () => { + const group = capture('a'); + const backRef = backreference(1); + const groupRegex = buildRegExp([group, backRef]); + + const match = groupRegex.exec('a\\1'); + expect(match).toBeNull(); + }); + + it('should allow references in multiple backreferences', () => { + const group1 = capture('a'); + const group2 = capture('b'); + const backRef1 = backreference(1); + const backRef2 = backreference(2); + const groupRegex = buildRegExp([group1, group2, backRef1, backRef2]); + const match = groupRegex.exec('aabb'); + expect(match).toBeNull(); + }); + + it('should handle multiple valid backreferences', () => { + const group1 = capture('a'); + const group2 = capture('b'); + const backRef1 = backreference(1); + const backRef2 = backreference(2); + const groupRegex = buildRegExp([group1, group2, backRef1, backRef2]); + + const match = groupRegex.exec('aabb'); + expect(match).toBeNull(); + }); +}); + +it('should handle backreferences in different order', () => { + const group1 = capture('a'); + const group2 = capture('b'); + const backRef1 = backreference(2); + const backRef2 = backreference(1); + const groupRegex = buildRegExp([group1, group2, backRef1, backRef2]); + + const match = groupRegex.exec('abba'); + expect(match).not.toBeNull(); + expect(match?.[0]).toBe('abba'); +}); + +it('should not match when the backreference does not match the captured group', () => { + const group = capture('a'); + const backRef = backreference(1); + const groupRegex = buildRegExp([group, backRef]); + + const match = groupRegex.exec('abba'); + expect(match).toBeNull(); +}); + +it('should handle multiple backreferences to the same group', () => { + const group1 = capture('a'); + const backRef1 = backreference(1); + const backRef2 = backreference(1); + const groupRegex = buildRegExp([group1, backRef1, backRef2]); + + const match = groupRegex.exec('aaa'); + expect(match).not.toBeNull(); + expect(match?.[0]).toBe('aaa'); +}); diff --git a/src/constructs/__tests__/named-backreference.test.tsx b/src/constructs/__tests__/named-backreference.test.tsx new file mode 100644 index 0000000..3d102ca --- /dev/null +++ b/src/constructs/__tests__/named-backreference.test.tsx @@ -0,0 +1,43 @@ +import { buildRegExp, namedBackreference, namedCapture } from '../..'; + +describe('named-backreference function', () => { + it('should create a backreference to a previously captured group', () => { + const group = namedCapture('a', 'groupA'); + const groupRef = namedBackreference('groupA'); + const groupRegex = buildRegExp([group, groupRef]); + + const match = groupRegex.exec('aa'); + expect(match).not.toBeNull(); + expect(match?.[0]).toBe('aa'); + }); + + it('should not match when the backreference does not match the captured group', () => { + const group = namedCapture('a', 'groupA'); + const groupRef = namedBackreference('groupA'); + const groupRegex = buildRegExp([group, groupRef]); + + const match = groupRegex.exec('a\\1'); + expect(match).toBeNull(); + }); + + it('should allow references in multiple backreferences', () => { + const group1 = namedCapture('a', 'groupA'); + const group2 = namedCapture('b', 'groupB'); + const groupARef = namedBackreference('groupA'); + const groupBRef = namedBackreference('groupB'); + const groupRegex = buildRegExp([group1, group2, groupARef, groupBRef]); + + const match = groupRegex.exec('aabb'); + expect(match).toBeNull(); + }); + + it('should handle multiple valid backreferences', () => { + const group1 = namedCapture('ab', 'groupA'); + const group2 = namedCapture('ba', 'groupB'); + const groupARef = namedBackreference('groupA'); + const groupBRef = namedBackreference('groupB'); + const groupRegex = buildRegExp([group1, group2, groupARef, groupBRef]); + const match = groupRegex.exec('abbaabba'); + expect(match).not.toBeNull(); + }); +}); diff --git a/src/constructs/__tests__/named-capture.test.tsx b/src/constructs/__tests__/named-capture.test.tsx new file mode 100644 index 0000000..f4e8d4e --- /dev/null +++ b/src/constructs/__tests__/named-capture.test.tsx @@ -0,0 +1,49 @@ +import { buildRegExp, namedCapture, oneOrMore } from '../..'; + +describe('namedCapture function', () => { + it('should create a named capture group', () => { + const regex = buildRegExp(namedCapture('a', 'group1')); + const match = regex.exec('a'); + expect(match?.groups?.group1).toBe('a'); + }); + + it('should not match when the named capture group does not match the input', () => { + const regex = buildRegExp(namedCapture('a', 'group1')); + const match = regex.exec('b'); + expect(match).toBeNull(); + }); + + it('should handle multiple named capture groups', () => { + const regex = buildRegExp(namedCapture(['a', namedCapture('b', 'group2')], 'group1')); + const match = regex.exec('ab'); + expect(match).not.toBeNull(); + expect(match?.groups?.group1).toBe('ab'); + expect(match?.groups?.group2).toBe('b'); + }); + + it('should handle nested named capture groups', () => { + const regex = buildRegExp(namedCapture(['a', namedCapture('b', 'group2')], 'group1')); + const match = regex.exec('ab'); + expect(match?.groups?.group1).toBe('ab'); + expect(match?.groups?.group2).toBe('b'); + }); +}); + +describe('namedCapture RegEx matching', () => { + test('`named-capture` pattern', () => { + expect(namedCapture('a', 'abba')).toEqualRegex(/(?a)/); + expect(namedCapture('abc', 'abc')).toEqualRegex(/(?abc)/); + expect(namedCapture(oneOrMore('abc'), 'ababab')).toEqualRegex(/(?(?:abc)+)/); + expect(oneOrMore(namedCapture('abc', 'abacab'))).toEqualRegex(/(?abc)+/); + }); + + test('`named-capture` matching', () => { + expect(namedCapture('b', 'b')).toMatchGroups('ab', ['b', 'b']); + expect(['a', namedCapture('b', 'b')]).toMatchGroups('ab', ['ab', 'b']); + expect(['a', namedCapture('b', 'b'), namedCapture('c', 'c')]).toMatchGroups('abc', [ + 'abc', + 'b', + 'c', + ]); + }); +}); diff --git a/src/constructs/backreference.ts b/src/constructs/backreference.ts new file mode 100644 index 0000000..51bface --- /dev/null +++ b/src/constructs/backreference.ts @@ -0,0 +1,22 @@ +import type { EncodeResult } from '../encoder/types'; +import type { GroupNumber, RegexConstruct } from '../types'; + +export interface Backrefence extends RegexConstruct { + type: 'backreference'; + group: GroupNumber; +} + +export function backreference(groupNumber: GroupNumber): Backrefence { + return { + type: 'backreference', + group: groupNumber, + encode: encodeCapture, + }; +} + +function encodeCapture(this: Backrefence): EncodeResult { + return { + precedence: 'atom', + pattern: `\\${this.group}`, + }; +} diff --git a/src/constructs/named-backreference.ts b/src/constructs/named-backreference.ts new file mode 100644 index 0000000..3542dd7 --- /dev/null +++ b/src/constructs/named-backreference.ts @@ -0,0 +1,24 @@ +//import { encodeSequence } from '../encoder/encoder'; +import type { EncodeResult } from '../encoder/types'; +//import { ensureArray } from '../utils/elements'; +import type { RegexConstruct } from '../types'; + +export interface NamedBackreference extends RegexConstruct { + type: 'named-backreference'; + name: string; +} + +export function namedBackreference(groupName: string): NamedBackreference { + return { + type: 'named-backreference', + name: groupName, + encode: encodeCapture, + }; +} + +function encodeCapture(this: NamedBackreference): EncodeResult { + return { + precedence: 'atom', + pattern: `\\k<${this.name}>`, + }; +} diff --git a/src/constructs/named-capture.ts b/src/constructs/named-capture.ts new file mode 100644 index 0000000..654f06c --- /dev/null +++ b/src/constructs/named-capture.ts @@ -0,0 +1,26 @@ +import { encodeSequence } from '../encoder/encoder'; +import type { EncodeResult } from '../encoder/types'; +import { ensureArray } from '../utils/elements'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; + +export interface NamedCapture extends RegexConstruct { + type: 'named-capture'; + name: string; + children: RegexElement[]; +} + +export function namedCapture(sequence: RegexSequence, name: string): NamedCapture { + return { + type: 'named-capture', + name: name, + children: ensureArray(sequence), + encode: encodeCapture, + }; +} + +function encodeCapture(this: NamedCapture): EncodeResult { + return { + precedence: 'atom', + pattern: `(?<${this.name}>${encodeSequence(this.children).pattern})`, + }; +} diff --git a/src/index.ts b/src/index.ts index 1ba53a5..f1fa441 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,9 @@ export { buildPattern, buildRegExp } from './builders'; export { endOfString, notWordBoundary, startOfString, wordBoundary } from './constructs/anchors'; export { capture } from './constructs/capture'; +export { namedCapture } from './constructs/named-capture'; +export { backreference } from './constructs/backreference'; +export { namedBackreference } from './constructs/named-backreference'; export { any, anyOf, diff --git a/src/types.ts b/src/types.ts index 24a63c3..0de614e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -35,3 +35,5 @@ export interface RegexFlags { /** Penerate the start and end indices of each captured group in a match. */ hasIndices?: boolean; } +const validGroupNumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9] as const; +export type GroupNumber = (typeof validGroupNumbers)[number];