From 264d1239654f9fa001a37fd65bdb1a3974a6caca Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Mon, 23 Sep 2024 20:33:40 +0200 Subject: [PATCH] feat: null element support --- src/builders.ts | 12 ++++++--- src/constructs/__tests__/char-class.test.ts | 8 ++---- src/constructs/__tests__/choice-of.test.ts | 6 ++--- src/constructs/__tests__/encoder.test.tsx | 6 ++--- src/constructs/__tests__/repeat.test.tsx | 4 +-- src/constructs/capture.ts | 12 ++++++--- src/constructs/char-class.ts | 24 ++++++++++-------- src/constructs/choice-of.ts | 6 ++--- src/constructs/lookahead.ts | 8 +++++- src/constructs/lookbehind.ts | 8 +++++- src/constructs/negative-lookahead.ts | 8 +++++- src/constructs/negative-lookbehind.ts | 9 +++++-- src/constructs/quantifiers.ts | 27 ++++++++++++++++++--- src/constructs/repeat.ts | 5 +++- src/encoder.ts | 27 +++++++++++++++------ src/types.ts | 4 +-- src/utils.ts | 13 +--------- 17 files changed, 121 insertions(+), 66 deletions(-) diff --git a/src/builders.ts b/src/builders.ts index f65c7fb..cb345a2 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -8,8 +8,12 @@ import { encode } from './encoder'; * @param flags RegExp flags object * @returns RegExp object */ -export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp { - const pattern = encode(sequence).pattern; +export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp | undefined { + const pattern = encode(sequence)?.pattern; + if (!pattern) { + return undefined; + } + ensureUnicodeFlagIfNeeded(pattern, flags); const flagsString = encodeFlags(flags ?? {}); @@ -21,8 +25,8 @@ export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp * @param elements Single regex element or array of elements * @returns regex pattern string */ -export function buildPattern(sequence: RegexSequence): string { - return encode(sequence).pattern; +export function buildPattern(sequence: RegexSequence): string | undefined { + return encode(sequence)?.pattern; } function encodeFlags(flags: RegexFlags): string { diff --git a/src/constructs/__tests__/char-class.test.ts b/src/constructs/__tests__/char-class.test.ts index 203c96e..3b10d22 100644 --- a/src/constructs/__tests__/char-class.test.ts +++ b/src/constructs/__tests__/char-class.test.ts @@ -43,8 +43,8 @@ test('`charClass` joins character escapes', () => { expect(charClass(word, nonDigit)).toEqualRegex(/[\w\D]/); }); -test('`charClass` throws on empty text', () => { - expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`); +test('`charClass` on empty text', () => { + expect(charClass()).toBeNull(); }); test('`charRange` pattern', () => { @@ -96,10 +96,6 @@ test('`anyOf` handles basic cases pattern', () => { expect(['x', anyOf('ab'), 'x']).toEqualRegex(/x[ab]x/); }); -test('`anyOf` throws on empty text', () => { - expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(`"Expected at least one character"`); -}); - test('`anyOf` pattern with quantifiers', () => { expect(['x', oneOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]+x/); expect(['x', optional(anyOf('abc')), 'x']).toEqualRegex(/x[abc]?x/); diff --git a/src/constructs/__tests__/choice-of.test.ts b/src/constructs/__tests__/choice-of.test.ts index 3d3a2e5..9fe5fc4 100644 --- a/src/constructs/__tests__/choice-of.test.ts +++ b/src/constructs/__tests__/choice-of.test.ts @@ -32,8 +32,6 @@ test('`choiceOf` pattern using nested regex', () => { ); }); -test('`choiceOf` throws on empty options', () => { - expect(() => choiceOf()).toThrowErrorMatchingInlineSnapshot( - `"Expected at least one alternative"`, - ); +test('`choiceOf` on empty options', () => { + expect(choiceOf()).toBeNull(); }); diff --git a/src/constructs/__tests__/encoder.test.tsx b/src/constructs/__tests__/encoder.test.tsx index 9ba7c06..6c3e973 100644 --- a/src/constructs/__tests__/encoder.test.tsx +++ b/src/constructs/__tests__/encoder.test.tsx @@ -81,8 +81,6 @@ test('`buildRegExp` throws error on unknown element', () => { `); }); -test('`buildPattern` throws on empty text', () => { - expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot( - `"Expected at least one character"`, - ); +test('`buildPattern` on empty text', () => { + expect(buildPattern('')).toBeUndefined(); }); diff --git a/src/constructs/__tests__/repeat.test.tsx b/src/constructs/__tests__/repeat.test.tsx index c7158aa..e081f5a 100644 --- a/src/constructs/__tests__/repeat.test.tsx +++ b/src/constructs/__tests__/repeat.test.tsx @@ -15,8 +15,8 @@ test('`repeat` pattern optimizes grouping for atoms', () => { expect(repeat(digit, { min: 1, max: 5 })).toEqualRegex(/\d{1,5}/); }); -test('`repeat` throws on no children', () => { - expect(() => repeat([], 1)).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`); +test('`repeat` accepts no children', () => { + expect(repeat([], 1)).toBeNull(); }); test('greedy `repeat` quantifier pattern', () => { diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index 3814866..9247637 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -1,5 +1,6 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +import { ensureElements } from '../utils'; export type CaptureOptions = { /** @@ -17,18 +18,23 @@ export interface Reference extends EncodedRegex { * - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`) * - in the regex itself, through {@link ref} */ -export function capture(sequence: RegexSequence, options?: CaptureOptions): EncodedRegex { +export function capture(sequence: RegexSequence, options?: CaptureOptions): EncodedRegex | null { + const elements = ensureElements(sequence); + if (elements.length === 0) { + return null; + } + const name = options?.name; if (name) { return { precedence: 'atom', - pattern: `(?<${name}>${encode(sequence).pattern})`, + pattern: `(?<${name}>${encode(elements).pattern})`, }; } return { precedence: 'atom', - pattern: `(${encode(sequence).pattern})`, + pattern: `(${encode(elements).pattern})`, }; } diff --git a/src/constructs/char-class.ts b/src/constructs/char-class.ts index a2b475d..df22d49 100644 --- a/src/constructs/char-class.ts +++ b/src/constructs/char-class.ts @@ -1,5 +1,4 @@ import type { CharacterClass, CharacterEscape, EncodedRegex } from '../types'; -import { ensureText } from '../utils'; /** * Creates a character class which matches any one of the given characters. @@ -7,13 +6,16 @@ import { ensureText } from '../utils'; * @param elements - Member characters or character ranges. * @returns Character class. */ -export function charClass(...elements: Array): CharacterClass { - if (!elements.length) { - throw new Error('Expected at least one element'); +export function charClass( + ...elements: Array +): CharacterClass | null { + const allElements = elements.flatMap((c) => c?.elements).filter((c) => c != null); + if (allElements.length === 0) { + return null; } return { - elements: elements.map((c) => c.elements).flat(), + elements: allElements, encode: encodeCharClass, }; } @@ -46,9 +48,7 @@ export function charRange(start: string, end: string): CharacterClass { * @param chars - Characters to match. * @returns Character class. */ -export function anyOf(chars: string): CharacterClass { - ensureText(chars); - +export function anyOf(chars: string): CharacterClass | null { return { elements: chars.split('').map(escapeChar), encode: encodeCharClass, @@ -61,7 +61,7 @@ export function anyOf(chars: string): CharacterClass { * @param element - Character class or character escape to negate. * @returns Negated character class. */ -export function negated(element: CharacterClass | CharacterEscape): EncodedRegex { +export function negated(element: CharacterClass | CharacterEscape): EncodedRegex | null { return encodeCharClass.call(element, true); } @@ -79,7 +79,11 @@ function escapeChar(text: string): string { function encodeCharClass( this: CharacterClass | CharacterEscape, isNegated?: boolean, -): EncodedRegex { +): EncodedRegex | null { + if (this.elements.length === 0) { + return null; + } + return { precedence: 'atom', pattern: `[${isNegated ? '^' : ''}${this.elements.join('')}]`, diff --git a/src/constructs/choice-of.ts b/src/constructs/choice-of.ts index 0434174..01247fe 100644 --- a/src/constructs/choice-of.ts +++ b/src/constructs/choice-of.ts @@ -7,12 +7,12 @@ import type { EncodedRegex, RegexSequence } from '../types'; * @param alternatives - Alternatives to choose from. * @returns Choice of alternatives. */ -export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex { +export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex | null { if (alternatives.length === 0) { - throw new Error('Expected at least one alternative'); + return null; } - const encodedAlternatives = alternatives.map((c) => encode(c)); + const encodedAlternatives = alternatives.map((c) => encode(c)).filter((c) => c != null); if (encodedAlternatives.length === 1) { return encodedAlternatives[0]!; } diff --git a/src/constructs/lookahead.ts b/src/constructs/lookahead.ts index 6180033..ffe7de4 100644 --- a/src/constructs/lookahead.ts +++ b/src/constructs/lookahead.ts @@ -1,5 +1,6 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +import { ensureElements } from '../utils'; /** * Positive lookahead assertion. @@ -15,7 +16,12 @@ import type { EncodedRegex, RegexSequence } from '../types'; * // /(?=abc)/ * ``` */ -export function lookahead(sequence: RegexSequence): EncodedRegex { +export function lookahead(sequence: RegexSequence): EncodedRegex | null { + const elements = ensureElements(sequence); + if (elements.length === 0) { + return null; + } + return { precedence: 'atom', pattern: `(?=${encode(sequence).pattern})`, diff --git a/src/constructs/lookbehind.ts b/src/constructs/lookbehind.ts index 9187bed..7c86469 100644 --- a/src/constructs/lookbehind.ts +++ b/src/constructs/lookbehind.ts @@ -1,5 +1,6 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +import { ensureElements } from '../utils'; /** * Positive lookbehind assertion. @@ -15,7 +16,12 @@ import type { EncodedRegex, RegexSequence } from '../types'; * // /(?<=abc)/ * ``` */ -export function lookbehind(sequence: RegexSequence): EncodedRegex { +export function lookbehind(sequence: RegexSequence): EncodedRegex | null { + const elements = ensureElements(sequence); + if (elements.length === 0) { + return null; + } + return { precedence: 'atom', pattern: `(?<=${encode(sequence).pattern})`, diff --git a/src/constructs/negative-lookahead.ts b/src/constructs/negative-lookahead.ts index 5694ca6..5c19ffc 100644 --- a/src/constructs/negative-lookahead.ts +++ b/src/constructs/negative-lookahead.ts @@ -1,5 +1,6 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +import { ensureElements } from '../utils'; /** * Negative lookahead assertion. @@ -15,7 +16,12 @@ import type { EncodedRegex, RegexSequence } from '../types'; * // /(?=abc)/ * ``` */ -export function negativeLookahead(sequence: RegexSequence): EncodedRegex { +export function negativeLookahead(sequence: RegexSequence): EncodedRegex | null { + const elements = ensureElements(sequence); + if (elements.length === 0) { + return null; + } + return { precedence: 'atom', pattern: `(?!${encode(sequence).pattern})`, diff --git a/src/constructs/negative-lookbehind.ts b/src/constructs/negative-lookbehind.ts index b0264f3..879442a 100644 --- a/src/constructs/negative-lookbehind.ts +++ b/src/constructs/negative-lookbehind.ts @@ -1,6 +1,6 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; - +import { ensureElements } from '../utils'; /** * Negative lookbehind assertion. * @@ -15,7 +15,12 @@ import type { EncodedRegex, RegexSequence } from '../types'; * // /(? encodeElement(n)); + const encoded = elements.map((n) => encodeElement(n)).filter((n) => n != null); + if (encoded.length === 0) { + return null; + } if (encoded.length === 1) { return encoded[0]!; @@ -17,12 +20,20 @@ export function encode(sequence: RegexSequence): EncodedRegex { }; } -export function encodeAtomic(sequence: RegexSequence): string { +export function encodeAtomic(sequence: RegexSequence): string | null { const encoded = encode(sequence); + if (encoded == null) { + return null; + } + return encoded.precedence === 'atom' ? encoded.pattern : `(?:${encoded.pattern})`; } -function encodeElement(element: RegexElement): EncodedRegex { +function encodeElement(element: RegexElement): EncodedRegex | null { + if (element == null) { + return null; + } + if (typeof element === 'string') { return encodeText(element); } @@ -46,8 +57,10 @@ function encodeElement(element: RegexElement): EncodedRegex { throw new Error(`Unsupported element. Received: ${JSON.stringify(element, null, 2)}`); } -function encodeText(text: string): EncodedRegex { - ensureText(text); +function encodeText(text: string): EncodedRegex | null { + if (text.length === 0) { + return null; + } return { // Optimize for single character case diff --git a/src/types.ts b/src/types.ts index 4a4b056..2032a18 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,7 +10,7 @@ export type RegexSequence = RegexElement[] | RegexElement; /** * Fundamental building block of a regular expression, defined as either a regex construct, `RegExp` object or a string. */ -export type RegexElement = RegexConstruct | RegExp | string; +export type RegexElement = RegexConstruct | RegExp | string | null; /** * Fundamental building block of a regular expression, defined as either an encoded regex or a character class. @@ -34,7 +34,7 @@ export type EncodePrecedence = 'atom' | 'sequence' | 'disjunction'; * Regex patter that can be encoded by calling the `encode` method. */ export interface LazyEncodableRegex { - encode: () => EncodedRegex; + encode: () => EncodedRegex | null; } /** diff --git a/src/utils.ts b/src/utils.ts index 5bc9232..d905ecc 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,16 +1,5 @@ import type { RegexElement, RegexSequence } from './types'; export function ensureElements(sequence: RegexSequence): RegexElement[] { - const elements = Array.isArray(sequence) ? sequence : [sequence]; - if (elements.length === 0) { - throw new Error('Expected at least one element'); - } - - return elements; -} - -export function ensureText(text: string): void { - if (text.length === 0) { - throw new Error('Expected at least one character'); - } + return Array.isArray(sequence) ? sequence : [sequence]; }