@@ -453,28 +453,174 @@ export enum StripReturnTypeBehavior {
453453 DO_NOT_STRIP ,
454454}
455455
456+ // All possible value separators, sorted by reverse length to ensure
457+ // that we match the longer comma prefix variants first if they are present
458+ const niceSeparators = [ ',' , 'and' , 'or' , ', and' , ', or' ] . sort ( ( a , b ) => b . length - a . length ) ;
459+ // Some string enums can also be objects, the final phrase is "or an object" and we
460+ // should gracefully terminate in that case
461+ const niceTerminators = [ ', or an Object' , 'or an Object' ] . sort ( ( a , b ) => b . length - a . length ) ;
462+ const suffixesToIgnore = [ '(Deprecated)' ] ;
463+
456464export const extractStringEnum = ( description : string ) : PossibleStringValue [ ] | null => {
457- const possibleValues : PossibleStringValue [ ] = [ ] ;
458-
459- const inlineValuesPattern = / (?: c a n b e | v a l u e s ? i n c l u d e s ? ) ( (?: (?: [ ` | ' ] [ a - z A - Z 0 - 9 -_ \. : ] + [ ` | ' ] ) (?: ( , | ) ? ) ) * (?: (?: o r | a n d ) [ ` | ' ] [ a - z A - Z 0 - 9 -_ \. : ] + [ ` | ' ] ) ? ) / i;
460- const inlineMatch = inlineValuesPattern . exec ( description ) ;
461- if ( inlineMatch ) {
462- const valueString = inlineMatch [ 1 ] ;
463- const valuePattern = / [ ` | ' ] ( [ a - z A - Z 0 - 9 -_ \. : ] + ) [ ` | ' ] / g;
464- let value = valuePattern . exec ( valueString ) ;
465-
466- while ( value ) {
467- possibleValues . push ( {
468- value : value [ 1 ] ,
469- description : '' ,
470- } ) ;
471- value = valuePattern . exec ( valueString ) ;
465+ const inlineValuesLocatorPattern = / (?: c a n b e | v a l u e s ? i n c l u d e s ? ) ( .+ ) / i;
466+ const locatorMatch = inlineValuesLocatorPattern . exec ( description ) ;
467+ if ( ! locatorMatch ) return null ;
468+
469+ const valuesTokens = locatorMatch [ 1 ] . split ( '' ) ;
470+
471+ const state = {
472+ // Where are we in the valueTokens array
473+ position : 0 ,
474+ // What values have we found so far
475+ values : [ ] as string [ ] ,
476+ // The current value we are building, was found wrapped by `currentQuoter`
477+ currentValue : '' ,
478+ // The quote character that we encountered to start building a value
479+ // We won't stop adding characters to `currentValue` until the same character
480+ // is encountered again
481+ currentQuoter : null as null | string ,
482+ // In some cases quoted values are wrapped with other markdown indicators, for
483+ // instance strikethrough ~ characters. This handles those to ensure anything
484+ // we allow as a wrapping character is unwrapped after a value is extracted.
485+ currentQuoterWrappers : [ ] as string [ ] ,
486+ // This is set to true after a value is extracted to allow us to parse out a
487+ // nice separator. For instance a "comma", a complete list is in `niceSeparators`
488+ // above.
489+ expectingNiceSeparator : false ,
490+ // This is set after the state machine reaches a point that _could_ be the end,
491+ // an invalid token when this is set to true is not a fatal error rather the
492+ // graceful termination of the state machine.
493+ couldBeDone : false ,
494+ } ;
495+ const lookAhead = ( length : number ) => {
496+ return valuesTokens . slice ( state . position - 1 , state . position + length - 1 ) . join ( '' ) ;
497+ } ;
498+ stringEnumTokenLoop: while ( state . position < valuesTokens . length ) {
499+ const char = valuesTokens [ state . position ] ;
500+ state . position ++ ;
501+
502+ if ( state . currentQuoter ) {
503+ // We should never expect a separator inside a quoted value
504+ if ( state . expectingNiceSeparator ) {
505+ throw new Error ( 'Impossible state encountered while extracting a string enum' ) ;
506+ }
507+ if ( char === state . currentQuoter ) {
508+ state . currentQuoter = null ;
509+ state . values . push ( state . currentValue ) ;
510+ state . currentValue = '' ;
511+ state . expectingNiceSeparator = true ;
512+ } else {
513+ state . currentValue += char ;
514+ }
515+ } else {
516+ // Whitespace can be skipped
517+ if ( char === ' ' ) {
518+ continue stringEnumTokenLoop;
519+ }
520+
521+ // If we're between values we should be expecting one of the above "nice"
522+ // separators.
523+ if ( state . expectingNiceSeparator ) {
524+ // Before checking for a separator we need to ensure we have unwrapped any wrapping
525+ // chars
526+ if ( state . currentQuoterWrappers . length ) {
527+ const expectedUnwrap = state . currentQuoterWrappers . pop ( ) ;
528+ if ( char !== expectedUnwrap ) {
529+ throw new Error (
530+ `Unexpected token while extracting string enum. Expected an unwrapping token that matched "${ expectedUnwrap } ". But found token: ${ char } \nContext: "${
531+ locatorMatch [ 1 ]
532+ } "\n${ ' ' . repeat ( 8 + state . position ) } ^`,
533+ ) ;
534+ }
535+ continue stringEnumTokenLoop;
536+ }
537+
538+ if ( char === '.' || char === ';' || char === '-' ) {
539+ break stringEnumTokenLoop;
540+ }
541+
542+ for ( const suffix of suffixesToIgnore ) {
543+ if ( lookAhead ( suffix . length ) === suffix ) {
544+ state . position += suffix . length - 1 ;
545+ continue stringEnumTokenLoop;
546+ }
547+ }
548+
549+ for ( const niceTerminator of niceTerminators ) {
550+ if ( lookAhead ( niceTerminator . length ) === niceTerminator ) {
551+ state . position += niceTerminator . length - 1 ;
552+ state . expectingNiceSeparator = false ;
553+ state . couldBeDone = true ;
554+ continue stringEnumTokenLoop;
555+ }
556+ }
557+
558+ for ( const niceSeparator of niceSeparators ) {
559+ if ( lookAhead ( niceSeparator . length ) === niceSeparator ) {
560+ state . position += niceSeparator . length - 1 ;
561+ state . expectingNiceSeparator = false ;
562+ if ( niceSeparator === ',' ) {
563+ state . couldBeDone = true ;
564+ }
565+ continue stringEnumTokenLoop;
566+ }
567+ }
568+ throw new Error (
569+ `Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "${ char } "\nContext: ${
570+ locatorMatch [ 1 ]
571+ } \n${ ' ' . repeat ( 8 + state . position ) } ^`,
572+ ) ;
573+ }
574+
575+ if ( [ '"' , "'" , '`' ] . includes ( char ) ) {
576+ // Quote chars start a new value
577+ state . currentQuoter = char ;
578+ // A new value has started, we no longer could be done on an invalid char
579+ state . couldBeDone = false ;
580+ continue stringEnumTokenLoop;
581+ }
582+ if ( [ '~' ] . includes ( char ) ) {
583+ // Deprecated string enum values are wrapped with strikethrough
584+ state . currentQuoterWrappers . push ( char ) ;
585+ continue stringEnumTokenLoop;
586+ }
587+ // If we are at the very start we should just assume our heuristic found something silly
588+ // and bail, 0 valid characters is skip-able
589+ if ( state . position === 1 ) {
590+ return null ;
591+ }
592+ // If the last thing we parsed _could_ have been a termination character
593+ // let's assume an invalid character here confirms that.
594+ if ( state . couldBeDone ) {
595+ break stringEnumTokenLoop;
596+ }
597+ // Anything else is unexpected
598+ throw new Error (
599+ `Unexpected token while extracting string enum. Token: ${ char } \nContext: "${
600+ locatorMatch [ 1 ]
601+ } "\n${ ' ' . repeat ( 9 + state . position ) } ^`,
602+ ) ;
472603 }
604+ }
605+
606+ // Reached the end of the description, we should check
607+ // if we are in a clean state (not inside a quote).
608+ // If so we're good, if not hard error
609+ if ( state . currentQuoter || state . currentValue ) {
610+ throw new Error (
611+ `Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?\nContext: ${ locatorMatch [ 1 ] } ` ,
612+ ) ;
613+ }
473614
474- return possibleValues . length === 0 ? null : possibleValues ;
615+ // No options we should just bail, can't have a string enum with 0 options
616+ if ( ! state . values . length ) {
617+ return null ;
475618 }
476619
477- return null ;
620+ return state . values . map ( value => ( {
621+ value,
622+ description : '' ,
623+ } ) ) ;
478624} ;
479625
480626export const extractReturnType = (
0 commit comments