Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: support double quote string enum #122

Merged
merged 5 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions src/__tests__/markdown-helpers.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,76 @@ def fn():
expect(extractStringEnum('wassup')).toBe(null);
});

it('should error helpfully on invalid value separators', () => {
expect(() => extractStringEnum('Can be `x` sometimes `y'))
.toThrowErrorMatchingInlineSnapshot(`
"Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "s"
Context: \`x\` sometimes \`y
^"
`);
});

it('should error helpfully on unterminated enum strings', () => {
expect(() => extractStringEnum('Can be `x` or `y')).toThrowErrorMatchingInlineSnapshot(`
"Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?
Context: \`x\` or \`y"
`);
});

describe('mixed ticks', () => {
it('should extract an enum when mixed quotes are used', () => {
const values = extractStringEnum('Can be `x"` or "`y"')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x"');
expect(values[1].value).toBe('`y');
});
});

describe('deprecated wrappers', () => {
it('should handle strikethrough deprecation wrappers', () => {
const values = extractStringEnum('Can be `x` or ~~`y`~~')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});
});

describe('lead-in descriptions', () => {
it('should handle value lists that smoothly lead in to prose with a comma', () => {
const values = extractStringEnum('Can be `x` or `y`, where `x` implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a fullstop', () => {
const values = extractStringEnum('Can be `x` or `y`. The `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a semicolon', () => {
const values = extractStringEnum('Can be `x` or `y`; the `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a hyphen', () => {
const values = extractStringEnum('Can be `x` or `y` - the `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});
});

describe('with backticks', () => {
it('should extract an enum of the format "can be x"', () => {
const values = extractStringEnum('Can be `x`')!;
Expand Down Expand Up @@ -260,6 +330,65 @@ def fn():
});
});

describe('with double quotes', () => {
it('should extract an enum of the format "can be x"', () => {
const values = extractStringEnum(`Can be "x"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(1);
expect(values[0].value).toBe('x');
});

it('should extract an enum of the format "can be x or y"', () => {
const values = extractStringEnum(`Can be "x" or "y"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should extract an enum of the format "can be x, y or z"', () => {
const values = extractStringEnum(`Can be "x", "y" or "z"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
expect(values[2].value).toBe('z');
});

it('should extract an enum of the format "can be x, y, or z"', () => {
const values = extractStringEnum(`Can be "x", "y", or "z"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
expect(values[2].value).toBe('z');
});

it('should extract an enum of the format "values include a', () => {
const values = extractStringEnum(`Values include "a"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(1);
expect(values[0].value).toBe('a');
});

it('should extract an enum of the format "values include a and b', () => {
const values = extractStringEnum(`Values include "a" and "b"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('a');
expect(values[1].value).toBe('b');
});

it('should extract an enum of the format "values include a, b and c', () => {
const values = extractStringEnum(`Values include "a", "b" and "c"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('a');
expect(values[1].value).toBe('b');
expect(values[2].value).toBe('c');
});
});

describe('rawTypeToTypeInformation()', () => {
it('should map a primitive types correctly', () => {
expect(rawTypeToTypeInformation('Boolean', '', null)).toMatchSnapshot();
Expand Down
180 changes: 163 additions & 17 deletions src/markdown-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -453,28 +453,174 @@ export enum StripReturnTypeBehavior {
DO_NOT_STRIP,
}

// All possible value separators, sorted by reverse length to ensure
// that we match the longer comma prefix variants first if they are present
const niceSeparators = [',', 'and', 'or', ', and', ', or'].sort((a, b) => b.length - a.length);
// Some string enums can also be objects, the final phrase is "or an object" and we
// should gracefully terminate in that case
const niceTerminators = [', or an Object', 'or an Object'].sort((a, b) => b.length - a.length);
const suffixesToIgnore = ['(Deprecated)'];

export const extractStringEnum = (description: string): PossibleStringValue[] | null => {
const possibleValues: PossibleStringValue[] = [];

const inlineValuesPattern = /(?:can be|values? includes?) ((?:(?:[`|'][a-zA-Z0-9-_\.:]+[`|'])(?:(, | )?))*(?:(?:or|and) [`|'][a-zA-Z0-9-_\.:]+[`|'])?)/i;
const inlineMatch = inlineValuesPattern.exec(description);
if (inlineMatch) {
const valueString = inlineMatch[1];
const valuePattern = /[`|']([a-zA-Z0-9-_\.:]+)[`|']/g;
let value = valuePattern.exec(valueString);

while (value) {
possibleValues.push({
value: value[1],
description: '',
});
value = valuePattern.exec(valueString);
const inlineValuesLocatorPattern = /(?:can be|values? includes?) (.+)/i;
const locatorMatch = inlineValuesLocatorPattern.exec(description);
if (!locatorMatch) return null;

const valuesTokens = locatorMatch[1].split('');

const state = {
// Where are we in the valueTokens array
position: 0,
// What values have we found so far
values: [] as string[],
// The current value we are building, was found wrapped by `currentQuoter`
currentValue: '',
// The quote character that we encountered to start building a value
// We won't stop adding characters to `currentValue` until the same character
// is encountered again
currentQuoter: null as null | string,
// In some cases quoted values are wrapped with other markdown indicators, for
// instance strikethrough ~ characters. This handles those to ensure anything
// we allow as a wrapping character is unwrapped after a value is extracted.
currentQuoterWrappers: [] as string[],
// This is set to true after a value is extracted to allow us to parse out a
// nice separator. For instance a "comma", a complete list is in `niceSeparators`
// above.
expectingNiceSeparator: false,
// This is set after the state machine reaches a point that _could_ be the end,
// an invalid token when this is set to true is not a fatal error rather the
// graceful termination of the state machine.
couldBeDone: false,
};
const lookAhead = (length: number) => {
return valuesTokens.slice(state.position - 1, state.position + length - 1).join('');
};
stringEnumTokenLoop: while (state.position < valuesTokens.length) {
const char = valuesTokens[state.position];
state.position++;

if (state.currentQuoter) {
// We should never expect a separator inside a quoted value
if (state.expectingNiceSeparator) {
throw new Error('Impossible state encountered while extracting a string enum');
}
if (char === state.currentQuoter) {
state.currentQuoter = null;
state.values.push(state.currentValue);
state.currentValue = '';
state.expectingNiceSeparator = true;
} else {
state.currentValue += char;
}
} else {
// Whitespace can be skipped
if (char === ' ') {
continue stringEnumTokenLoop;
}

// If we're between values we should be expecting one of the above "nice"
// separators.
if (state.expectingNiceSeparator) {
// Before checking for a separator we need to ensure we have unwrapped any wrapping
// chars
if (state.currentQuoterWrappers.length) {
const expectedUnwrap = state.currentQuoterWrappers.pop();
if (char !== expectedUnwrap) {
throw new Error(
`Unexpected token while extracting string enum. Expected an unwrapping token that matched "${expectedUnwrap}". But found token: ${char}\nContext: "${
locatorMatch[1]
}"\n${' '.repeat(8 + state.position)}^`,
);
}
continue stringEnumTokenLoop;
}

if (char === '.' || char === ';' || char === '-') {
break stringEnumTokenLoop;
}

for (const suffix of suffixesToIgnore) {
if (lookAhead(suffix.length) === suffix) {
state.position += suffix.length - 1;
continue stringEnumTokenLoop;
}
}

for (const niceTerminator of niceTerminators) {
if (lookAhead(niceTerminator.length) === niceTerminator) {
state.position += niceTerminator.length - 1;
state.expectingNiceSeparator = false;
state.couldBeDone = true;
continue stringEnumTokenLoop;
}
}

for (const niceSeparator of niceSeparators) {
if (lookAhead(niceSeparator.length) === niceSeparator) {
state.position += niceSeparator.length - 1;
state.expectingNiceSeparator = false;
if (niceSeparator === ',') {
state.couldBeDone = true;
}
continue stringEnumTokenLoop;
}
}
throw new Error(
`Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "${char}"\nContext: ${
locatorMatch[1]
}\n${' '.repeat(8 + state.position)}^`,
);
}

if (['"', "'", '`'].includes(char)) {
// Quote chars start a new value
state.currentQuoter = char;
// A new value has started, we no longer could be done on an invalid char
state.couldBeDone = false;
continue stringEnumTokenLoop;
}
if (['~'].includes(char)) {
// Deprecated string enum values are wrapped with strikethrough
state.currentQuoterWrappers.push(char);
continue stringEnumTokenLoop;
}
// If we are at the very start we should just assume our heuristic found something silly
// and bail, 0 valid characters is skip-able
if (state.position === 1) {
return null;
}
// If the last thing we parsed _could_ have been a termination character
// let's assume an invalid character here confirms that.
if (state.couldBeDone) {
break stringEnumTokenLoop;
}
// Anything else is unexpected
throw new Error(
`Unexpected token while extracting string enum. Token: ${char}\nContext: "${
locatorMatch[1]
}"\n${' '.repeat(9 + state.position)}^`,
);
}
}

// Reached the end of the description, we should check
// if we are in a clean state (not inside a quote).
// If so we're good, if not hard error
if (state.currentQuoter || state.currentValue) {
throw new Error(
`Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?\nContext: ${locatorMatch[1]}`,
);
}

return possibleValues.length === 0 ? null : possibleValues;
// No options we should just bail, can't have a string enum with 0 options
if (!state.values.length) {
return null;
}

return null;
return state.values.map(value => ({
value,
description: '',
}));
};

export const extractReturnType = (
Expand Down