From d1c4a56a8e6fd978608f347c5984590f961298af Mon Sep 17 00:00:00 2001 From: Jos de Jong Date: Wed, 25 Oct 2023 13:37:53 +0200 Subject: [PATCH] Fix: #3073 escaping in strings (#3082) * chore: refactor parsing strings to not rely on `JSON.parse` * fix: #3073 function `format` not escaping control characters and double quotes in strings * chore: add more unit tests --- src/expression/parse.js | 125 +++++++----------- src/utils/string.js | 32 ++--- .../expression/node/ConstantNode.test.js | 3 +- test/unit-tests/expression/parse.test.js | 24 +++- test/unit-tests/utils/string.test.js | 15 +++ 5 files changed, 102 insertions(+), 97 deletions(-) diff --git a/src/expression/parse.js b/src/expression/parse.js index f0e2cdfcb8..45df38c985 100644 --- a/src/expression/parse.js +++ b/src/expression/parse.js @@ -185,6 +185,19 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({ 'Infinity' ] + const ESCAPE_CHARACTERS = { + '"': '"', + "'": "'", + '\\': '\\', + '/': '/', + b: '\b', + f: '\f', + n: '\n', + r: '\r', + t: '\t' + // note that \u is handled separately in parseStringToken() + } + function initialState () { return { extraNodes: {}, // current extra nodes, must be careful not to mutate @@ -1339,7 +1352,7 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({ return node } - return parseDoubleQuotesString(state) + return parseString(state) } /** @@ -1436,15 +1449,15 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({ } /** - * Parse a double quotes string. + * Parse a single or double quoted string. * @return {Node} node * @private */ - function parseDoubleQuotesString (state) { + function parseString (state) { let node, str - if (state.token === '"') { - str = parseDoubleQuotesStringToken(state) + if (state.token === '"' || state.token === "'") { + str = parseStringToken(state, state.token) // create constant node = new ConstantNode(str) @@ -1455,92 +1468,54 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({ return node } - return parseSingleQuotesString(state) + return parseMatrix(state) } /** - * Parse a string surrounded by double quotes "..." + * Parse a string surrounded by single or double quotes + * @param {Object} state + * @param {"'" | "\""} quote * @return {string} */ - function parseDoubleQuotesStringToken (state) { + function parseStringToken (state, quote) { let str = '' - while (currentCharacter(state) !== '' && currentCharacter(state) !== '"') { + while (currentCharacter(state) !== '' && currentCharacter(state) !== quote) { if (currentCharacter(state) === '\\') { - // escape character, immediately process the next - // character to prevent stopping at a next '\"' - const cNext = nextCharacter(state) - if (cNext !== "'") { - str += currentCharacter(state) - } next(state) - } - - str += currentCharacter(state) - next(state) - } - - getToken(state) - if (state.token !== '"') { - throw createSyntaxError(state, 'End of string " expected') - } - getToken(state) - - return JSON.parse('"' + str + '"') // unescape escaped characters - } - - /** - * Parse a single quotes string. - * @return {Node} node - * @private - */ - function parseSingleQuotesString (state) { - let node, str - - if (state.token === '\'') { - str = parseSingleQuotesStringToken(state) - - // create constant - node = new ConstantNode(str) - - // parse index parameters - node = parseAccessors(state, node) - - return node - } - - return parseMatrix(state) - } - /** - * Parse a string surrounded by single quotes '...' - * @return {string} - */ - function parseSingleQuotesStringToken (state) { - let str = '' - - while (currentCharacter(state) !== '' && currentCharacter(state) !== '\'') { - if (currentCharacter(state) === '\\') { - // escape character, immediately process the next - // character to prevent stopping at a next '\'' - const cNext = nextCharacter(state) - if (cNext !== "'" && cNext !== '"') { - str += currentCharacter(state) + const char = currentCharacter(state) + const escapeChar = ESCAPE_CHARACTERS[char] + if (escapeChar !== undefined) { + // an escaped control character like \" or \n + str += escapeChar + state.index += 1 + } else if (char === 'u') { + // escaped unicode character + const unicode = state.expression.slice(state.index + 1, state.index + 5) + if (/^[0-9A-Fa-f]{4}$/.test(unicode)) { // test whether the string holds four hexadecimal values + str += String.fromCharCode(parseInt(unicode, 16)) + state.index += 5 + } else { + throw createSyntaxError(state, `Invalid unicode character \\u${unicode}`) + } + } else { + throw createSyntaxError(state, `Bad escape character \\${char}`) } + } else { + // any regular character + str += currentCharacter(state) next(state) } - - str += currentCharacter(state) - next(state) } getToken(state) - if (state.token !== '\'') { - throw createSyntaxError(state, 'End of string \' expected') + if (state.token !== quote) { + throw createSyntaxError(state, `End of string ${quote} expected`) } getToken(state) - return JSON.parse('"' + str.replace(/"/g, '\\"') + '"') // unescape escaped characters + return str } /** @@ -1647,10 +1622,8 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({ if (state.token !== '}') { // parse key - if (state.token === '"') { - key = parseDoubleQuotesStringToken(state) - } else if (state.token === '\'') { - key = parseSingleQuotesStringToken(state) + if (state.token === '"' || state.token === "'") { + key = parseStringToken(state, state.token) } else if (state.tokenType === TOKENTYPE.SYMBOL || (state.tokenType === TOKENTYPE.DELIMITER && state.token in NAMED_DELIMITERS)) { key = state.token getToken(state) diff --git a/src/utils/string.js b/src/utils/string.js index 558a5da298..01cafa309a 100644 --- a/src/utils/string.js +++ b/src/utils/string.js @@ -86,7 +86,7 @@ function _format (value, options) { } if (isString(value)) { - return '"' + value + '"' + return stringify(value) } if (typeof value === 'function') { @@ -101,7 +101,7 @@ function _format (value, options) { return value.toString(options) } else { const entries = Object.keys(value).map(key => { - return '"' + key + '": ' + format(value[key], options) + return stringify(key) + ': ' + format(value[key], options) }) return '{' + entries.join(', ') + '}' @@ -122,28 +122,24 @@ export function stringify (value) { let escaped = '' let i = 0 while (i < text.length) { - let c = text.charAt(i) - - if (c === '\\') { - escaped += c - i++ - - c = text.charAt(i) - if (c === '' || '"\\/bfnrtu'.indexOf(c) === -1) { - escaped += '\\' // no valid escape character -> escape it - } - escaped += c - } else if (c === '"') { - escaped += '\\"' - } else { - escaped += c - } + const c = text.charAt(i) + escaped += (c in controlCharacters) ? controlCharacters[c] : c i++ } return '"' + escaped + '"' } +const controlCharacters = { + '"': '\\"', + '\\': '\\\\', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t' +} + /** * Escape special HTML characters * @param {*} value diff --git a/test/unit-tests/expression/node/ConstantNode.test.js b/test/unit-tests/expression/node/ConstantNode.test.js index 4a6cef45fd..358c94a548 100644 --- a/test/unit-tests/expression/node/ConstantNode.test.js +++ b/test/unit-tests/expression/node/ConstantNode.test.js @@ -138,6 +138,7 @@ describe('ConstantNode', function () { assert.deepStrictEqual(new ConstantNode(math.bignumber('1e500')).toString(), '1e+500') assert.deepStrictEqual(new ConstantNode(math.fraction(2, 3)).toString(), '2/3') assert.strictEqual(new ConstantNode('hi').toString(), '"hi"') + assert.strictEqual(new ConstantNode('with " double quote').toString(), '"with \\" double quote"') assert.strictEqual(new ConstantNode(true).toString(), 'true') assert.strictEqual(new ConstantNode(false).toString(), 'false') assert.strictEqual(new ConstantNode(undefined).toString(), 'undefined') @@ -218,6 +219,6 @@ describe('ConstantNode', function () { it('should escape strings in toTex', function () { const n = new ConstantNode('space tab\tunderscore_bla$/') - assert.strictEqual(n.toTex(), '\\mathtt{"space~tab\\qquad{}underscore\\_bla\\$/"}') + assert.strictEqual(n.toTex(), '\\mathtt{"space~tab\\textbackslash{}tunderscore\\_bla\\$/"}') }) }) diff --git a/test/unit-tests/expression/parse.test.js b/test/unit-tests/expression/parse.test.js index c5b909f804..9af98d4a44 100644 --- a/test/unit-tests/expression/parse.test.js +++ b/test/unit-tests/expression/parse.test.js @@ -364,7 +364,7 @@ describe('parse', function () { assert.deepStrictEqual(parseAndEval(' "hi" '), 'hi') }) - it('should parse a string containing quotes', function () { + it('should parse a string containing escape characters', function () { // quote assert.deepStrictEqual(parseAndEval('"with\'quote"'), "with'quote") @@ -386,6 +386,26 @@ describe('parse', function () { assert.deepStrictEqual(parseAndEval('"escaped backslash\\\\"'), 'escaped backslash\\') }) + it('should parse unicode characters', function () { + assert.deepStrictEqual(parseAndEval('"★"'), '★') + assert.deepStrictEqual(parseAndEval('"😀"'), '😀') + assert.deepStrictEqual(parseAndEval('"\ud83d\ude00"'), '\ud83d\ude00') + + assert.deepStrictEqual(parseAndEval('"\\ud83d\\ude00"'), '😀') + assert.deepStrictEqual(parseAndEval('"\\u2140"'), '⅀') + assert.deepStrictEqual(parseAndEval('"\\u221B"'), '∛') + }) + + it('should throw an error on an invalid unicode character', function () { + assert.throws(() => parseAndEval('"\\ud8'), /Invalid unicode character \\ud8/) + assert.throws(() => parseAndEval('"\\ud8TT'), /Invalid unicode character \\ud8TT/) + }) + + it('should throw an error on an invalid escape character', function () { + assert.throws(() => parseAndEval('"\\y'), /Bad escape character \\y/) + assert.throws(() => parseAndEval('"\\v'), /Bad escape character \\v/) + }) + it('should throw an error with invalid strings', function () { assert.throws(function () { parseAndEval('"hi') }, SyntaxError) assert.throws(function () { parseAndEval(' hi" ') }, Error) @@ -433,7 +453,7 @@ describe('parse', function () { assert.deepStrictEqual(parseAndEval(' \'hi\' '), 'hi') }) - it('should parse a string containing quotes', function () { + it('should parse a string containing escape characters', function () { // quote assert.deepStrictEqual(parseAndEval("'with\"quote'"), 'with"quote') diff --git a/test/unit-tests/utils/string.test.js b/test/unit-tests/utils/string.test.js index 0dbe952933..694821983f 100644 --- a/test/unit-tests/utils/string.test.js +++ b/test/unit-tests/utils/string.test.js @@ -71,6 +71,16 @@ describe('string', function () { assert.strictEqual(format('string'), '"string"') }) + it('should format a string with escape characters', function () { + assert.strictEqual(format('with " double quote'), '"with \\" double quote"') + assert.strictEqual(format('with \\ backslash'), '"with \\\\ backslash"') + assert.strictEqual(format('with \b'), '"with \\b"') + assert.strictEqual(format('with \f'), '"with \\f"') + assert.strictEqual(format('with \n newline'), '"with \\n newline"') + assert.strictEqual(format('with \r'), '"with \\r"') + assert.strictEqual(format('with \t tab'), '"with \\t tab"') + }) + it('should format an object', function () { const obj = { a: 1.1111, @@ -81,6 +91,11 @@ describe('string', function () { assert.strictEqual(format(obj, 3), '{"a": 1.11, "b": 2.22 + 3i}') }) + it('should format an object with escape characters', function () { + assert.strictEqual(format({ 'with " double quote': 42 }), '{"with \\" double quote": 42}') + assert.strictEqual(format({ 'with \\ backslash': 42 }), '{"with \\\\ backslash": 42}') + }) + it('should format an object with its own format function', function () { const obj = { format: function (options) {