From 958313742c7e505afa75fc49f2b342c47fc94896 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Mon, 29 Apr 2024 23:43:30 +0200 Subject: [PATCH 1/3] simplify char class --- src/constructs/character-class.ts | 55 +++++++++---------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/src/constructs/character-class.ts b/src/constructs/character-class.ts index 93754e0..d6d96a2 100644 --- a/src/constructs/character-class.ts +++ b/src/constructs/character-class.ts @@ -1,15 +1,6 @@ import type { EncodeResult } from '../encoder/types'; import type { RegexConstruct } from '../types'; -export interface CharacterClass extends RegexConstruct { - type: 'characterClass'; - escape?: string; - chars: string[]; - ranges: CharacterRange[]; - isNegated: boolean; - encode: () => EncodeResult; -} - /** * Character range from start to end (inclusive). */ @@ -18,6 +9,14 @@ export interface CharacterRange { end: string; } +export interface CharacterClass extends RegexConstruct { + type: 'characterClass'; + escape?: string; + chars?: string[]; + ranges?: CharacterRange[]; + isNegated?: boolean; +} + /** * Matches any single character. * Cannot be used as a part of character class. @@ -30,54 +29,36 @@ export const any: EncodeResult = { export const digit: CharacterClass = { type: 'characterClass', escape: '\\d', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; export const nonDigit: CharacterClass = { type: 'characterClass', escape: '\\D', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; export const word: CharacterClass = { type: 'characterClass', escape: '\\w', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; export const nonWord: CharacterClass = { type: 'characterClass', escape: '\\W', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; export const whitespace: CharacterClass = { type: 'characterClass', escape: '\\s', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; export const nonWhitespace: CharacterClass = { type: 'characterClass', escape: '\\S', - chars: [], - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; @@ -108,8 +89,7 @@ export function charClass(...elements: CharacterClass[]): CharacterClass { return { type: 'characterClass', chars: elements.map((c) => getAllChars(c)).flat(), - ranges: elements.map((c) => c.ranges).flat(), - isNegated: false, + ranges: elements.map((c) => c.ranges ?? []).flat(), encode: encodeCharacterClass, }; } @@ -129,9 +109,7 @@ export function charRange(start: string, end: string): CharacterClass { return { type: 'characterClass', - chars: [], ranges: [{ start, end }], - isNegated: false, encode: encodeCharacterClass, }; } @@ -146,8 +124,6 @@ export function anyOf(characters: string): CharacterClass { return { type: 'characterClass', chars, - ranges: [], - isNegated: false, encode: encodeCharacterClass, }; } @@ -155,6 +131,7 @@ export function anyOf(characters: string): CharacterClass { export function negated(element: CharacterClass): CharacterClass { return { type: 'characterClass', + escape: element.escape, chars: element.chars, ranges: element.ranges, isNegated: !element.isNegated, @@ -168,12 +145,12 @@ export function negated(element: CharacterClass): CharacterClass { export const inverted = negated; function encodeCharacterClass(this: CharacterClass): EncodeResult { - if (this.escape === undefined && this.chars.length === 0 && this.ranges.length === 0) { + if (this.escape === undefined && !this.chars?.length && !this.ranges?.length) { throw new Error('Character class should contain at least one character or character range'); } - // Direct rendering for single-character class - if (this.escape !== undefined && !this.chars.length && !this.ranges.length && !this.isNegated) { + // Direct rendering for escapes + if (this.escape !== undefined && !this.chars?.length && !this.ranges?.length && !this.isNegated) { return { precedence: 'atom', pattern: this.escape, @@ -188,7 +165,7 @@ function encodeCharacterClass(this: CharacterClass): EncodeResult { const hyphen = allChars.includes('-') ? '-' : ''; const caret = allChars.includes('^') ? '^' : ''; const otherChars = allChars.filter((c) => c !== '-' && c !== '^').join(''); - const ranges = this.ranges.map(({ start, end }) => `${start}-${end}`).join(''); + const ranges = this.ranges?.map(({ start, end }) => `${start}-${end}`).join('') ?? ''; const negation = this.isNegated ? '^' : ''; let pattern = `[${negation}${ranges}${otherChars}${caret}${hyphen}]`; @@ -206,8 +183,8 @@ function escapeForCharacterClass(text: string): string { function getAllChars(characterClass: CharacterClass) { if (characterClass.escape === undefined) { - return characterClass.chars; + return characterClass.chars ?? []; } - return [characterClass.escape, ...characterClass.chars]; + return [characterClass.escape, ...(characterClass.chars ?? [])]; } From a99bcd0d1dcb1fc9b08bf9f1c6f5b4789553a71e Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 30 Apr 2024 12:04:20 +0200 Subject: [PATCH 2/3] tweaks --- src/constructs/character-class.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/constructs/character-class.ts b/src/constructs/character-class.ts index d6d96a2..74bc5ad 100644 --- a/src/constructs/character-class.ts +++ b/src/constructs/character-class.ts @@ -157,7 +157,7 @@ function encodeCharacterClass(this: CharacterClass): EncodeResult { }; } - const allChars = getAllChars(this); + const allChars = getAllChars(this) ?? []; // If passed characters includes hyphen (`-`) it need to be moved to // first (or last) place in order to treat it as hyphen character and not a range. @@ -183,7 +183,7 @@ function escapeForCharacterClass(text: string): string { function getAllChars(characterClass: CharacterClass) { if (characterClass.escape === undefined) { - return characterClass.chars ?? []; + return characterClass.chars; } return [characterClass.escape, ...(characterClass.chars ?? [])]; From 19e5820ae29583ccde24b72b89ef0843a24e7fc9 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 30 Apr 2024 12:07:10 +0200 Subject: [PATCH 3/3] chore: fix ts --- src/constructs/character-class.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/constructs/character-class.ts b/src/constructs/character-class.ts index 74bc5ad..559624a 100644 --- a/src/constructs/character-class.ts +++ b/src/constructs/character-class.ts @@ -88,7 +88,7 @@ export function charClass(...elements: CharacterClass[]): CharacterClass { return { type: 'characterClass', - chars: elements.map((c) => getAllChars(c)).flat(), + chars: elements.map((c) => getAllChars(c) ?? []).flat(), ranges: elements.map((c) => c.ranges ?? []).flat(), encode: encodeCharacterClass, };