Skip to content

Commit

Permalink
Fix: #3073 escaping in strings (#3082)
Browse files Browse the repository at this point in the history
* chore: refactor parsing strings to not rely on `JSON.parse`

* fix: #3073 function `format` not escaping control characters and double quotes in strings

* chore: add more unit tests
  • Loading branch information
josdejong authored Oct 25, 2023
1 parent 4a26b06 commit d1c4a56
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 97 deletions.
125 changes: 49 additions & 76 deletions src/expression/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,19 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({
'Infinity'
]

const ESCAPE_CHARACTERS = {
'"': '"',
"'": "'",
'\\': '\\',
'/': '/',
b: '\b',
f: '\f',
n: '\n',
r: '\r',
t: '\t'
// note that \u is handled separately in parseStringToken()
}

function initialState () {
return {
extraNodes: {}, // current extra nodes, must be careful not to mutate
Expand Down Expand Up @@ -1339,7 +1352,7 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({
return node
}

return parseDoubleQuotesString(state)
return parseString(state)
}

/**
Expand Down Expand Up @@ -1436,15 +1449,15 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({
}

/**
* Parse a double quotes string.
* Parse a single or double quoted string.
* @return {Node} node
* @private
*/
function parseDoubleQuotesString (state) {
function parseString (state) {
let node, str

if (state.token === '"') {
str = parseDoubleQuotesStringToken(state)
if (state.token === '"' || state.token === "'") {
str = parseStringToken(state, state.token)

// create constant
node = new ConstantNode(str)
Expand All @@ -1455,92 +1468,54 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({
return node
}

return parseSingleQuotesString(state)
return parseMatrix(state)
}

/**
* Parse a string surrounded by double quotes "..."
* Parse a string surrounded by single or double quotes
* @param {Object} state
* @param {"'" | "\""} quote
* @return {string}
*/
function parseDoubleQuotesStringToken (state) {
function parseStringToken (state, quote) {
let str = ''

while (currentCharacter(state) !== '' && currentCharacter(state) !== '"') {
while (currentCharacter(state) !== '' && currentCharacter(state) !== quote) {
if (currentCharacter(state) === '\\') {
// escape character, immediately process the next
// character to prevent stopping at a next '\"'
const cNext = nextCharacter(state)
if (cNext !== "'") {
str += currentCharacter(state)
}
next(state)
}

str += currentCharacter(state)
next(state)
}

getToken(state)
if (state.token !== '"') {
throw createSyntaxError(state, 'End of string " expected')
}
getToken(state)

return JSON.parse('"' + str + '"') // unescape escaped characters
}

/**
* Parse a single quotes string.
* @return {Node} node
* @private
*/
function parseSingleQuotesString (state) {
let node, str

if (state.token === '\'') {
str = parseSingleQuotesStringToken(state)

// create constant
node = new ConstantNode(str)

// parse index parameters
node = parseAccessors(state, node)

return node
}

return parseMatrix(state)
}

/**
* Parse a string surrounded by single quotes '...'
* @return {string}
*/
function parseSingleQuotesStringToken (state) {
let str = ''

while (currentCharacter(state) !== '' && currentCharacter(state) !== '\'') {
if (currentCharacter(state) === '\\') {
// escape character, immediately process the next
// character to prevent stopping at a next '\''
const cNext = nextCharacter(state)
if (cNext !== "'" && cNext !== '"') {
str += currentCharacter(state)
const char = currentCharacter(state)
const escapeChar = ESCAPE_CHARACTERS[char]
if (escapeChar !== undefined) {
// an escaped control character like \" or \n
str += escapeChar
state.index += 1
} else if (char === 'u') {
// escaped unicode character
const unicode = state.expression.slice(state.index + 1, state.index + 5)
if (/^[0-9A-Fa-f]{4}$/.test(unicode)) { // test whether the string holds four hexadecimal values
str += String.fromCharCode(parseInt(unicode, 16))
state.index += 5
} else {
throw createSyntaxError(state, `Invalid unicode character \\u${unicode}`)
}
} else {
throw createSyntaxError(state, `Bad escape character \\${char}`)
}
} else {
// any regular character
str += currentCharacter(state)
next(state)
}

str += currentCharacter(state)
next(state)
}

getToken(state)
if (state.token !== '\'') {
throw createSyntaxError(state, 'End of string \' expected')
if (state.token !== quote) {
throw createSyntaxError(state, `End of string ${quote} expected`)
}
getToken(state)

return JSON.parse('"' + str.replace(/"/g, '\\"') + '"') // unescape escaped characters
return str
}

/**
Expand Down Expand Up @@ -1647,10 +1622,8 @@ export const createParse = /* #__PURE__ */ factory(name, dependencies, ({

if (state.token !== '}') {
// parse key
if (state.token === '"') {
key = parseDoubleQuotesStringToken(state)
} else if (state.token === '\'') {
key = parseSingleQuotesStringToken(state)
if (state.token === '"' || state.token === "'") {
key = parseStringToken(state, state.token)
} else if (state.tokenType === TOKENTYPE.SYMBOL || (state.tokenType === TOKENTYPE.DELIMITER && state.token in NAMED_DELIMITERS)) {
key = state.token
getToken(state)
Expand Down
32 changes: 14 additions & 18 deletions src/utils/string.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ function _format (value, options) {
}

if (isString(value)) {
return '"' + value + '"'
return stringify(value)
}

if (typeof value === 'function') {
Expand All @@ -101,7 +101,7 @@ function _format (value, options) {
return value.toString(options)
} else {
const entries = Object.keys(value).map(key => {
return '"' + key + '": ' + format(value[key], options)
return stringify(key) + ': ' + format(value[key], options)
})

return '{' + entries.join(', ') + '}'
Expand All @@ -122,28 +122,24 @@ export function stringify (value) {
let escaped = ''
let i = 0
while (i < text.length) {
let c = text.charAt(i)

if (c === '\\') {
escaped += c
i++

c = text.charAt(i)
if (c === '' || '"\\/bfnrtu'.indexOf(c) === -1) {
escaped += '\\' // no valid escape character -> escape it
}
escaped += c
} else if (c === '"') {
escaped += '\\"'
} else {
escaped += c
}
const c = text.charAt(i)
escaped += (c in controlCharacters) ? controlCharacters[c] : c
i++
}

return '"' + escaped + '"'
}

const controlCharacters = {
'"': '\\"',
'\\': '\\\\',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t'
}

/**
* Escape special HTML characters
* @param {*} value
Expand Down
3 changes: 2 additions & 1 deletion test/unit-tests/expression/node/ConstantNode.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ describe('ConstantNode', function () {
assert.deepStrictEqual(new ConstantNode(math.bignumber('1e500')).toString(), '1e+500')
assert.deepStrictEqual(new ConstantNode(math.fraction(2, 3)).toString(), '2/3')
assert.strictEqual(new ConstantNode('hi').toString(), '"hi"')
assert.strictEqual(new ConstantNode('with " double quote').toString(), '"with \\" double quote"')
assert.strictEqual(new ConstantNode(true).toString(), 'true')
assert.strictEqual(new ConstantNode(false).toString(), 'false')
assert.strictEqual(new ConstantNode(undefined).toString(), 'undefined')
Expand Down Expand Up @@ -218,6 +219,6 @@ describe('ConstantNode', function () {
it('should escape strings in toTex', function () {
const n = new ConstantNode('space tab\tunderscore_bla$/')

assert.strictEqual(n.toTex(), '\\mathtt{"space~tab\\qquad{}underscore\\_bla\\$/"}')
assert.strictEqual(n.toTex(), '\\mathtt{"space~tab\\textbackslash{}tunderscore\\_bla\\$/"}')
})
})
24 changes: 22 additions & 2 deletions test/unit-tests/expression/parse.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ describe('parse', function () {
assert.deepStrictEqual(parseAndEval(' "hi" '), 'hi')
})

it('should parse a string containing quotes', function () {
it('should parse a string containing escape characters', function () {
// quote
assert.deepStrictEqual(parseAndEval('"with\'quote"'), "with'quote")

Expand All @@ -386,6 +386,26 @@ describe('parse', function () {
assert.deepStrictEqual(parseAndEval('"escaped backslash\\\\"'), 'escaped backslash\\')
})

it('should parse unicode characters', function () {
assert.deepStrictEqual(parseAndEval('"★"'), '★')
assert.deepStrictEqual(parseAndEval('"😀"'), '😀')
assert.deepStrictEqual(parseAndEval('"\ud83d\ude00"'), '\ud83d\ude00')

assert.deepStrictEqual(parseAndEval('"\\ud83d\\ude00"'), '😀')
assert.deepStrictEqual(parseAndEval('"\\u2140"'), '⅀')
assert.deepStrictEqual(parseAndEval('"\\u221B"'), '∛')
})

it('should throw an error on an invalid unicode character', function () {
assert.throws(() => parseAndEval('"\\ud8'), /Invalid unicode character \\ud8/)
assert.throws(() => parseAndEval('"\\ud8TT'), /Invalid unicode character \\ud8TT/)
})

it('should throw an error on an invalid escape character', function () {
assert.throws(() => parseAndEval('"\\y'), /Bad escape character \\y/)
assert.throws(() => parseAndEval('"\\v'), /Bad escape character \\v/)
})

it('should throw an error with invalid strings', function () {
assert.throws(function () { parseAndEval('"hi') }, SyntaxError)
assert.throws(function () { parseAndEval(' hi" ') }, Error)
Expand Down Expand Up @@ -433,7 +453,7 @@ describe('parse', function () {
assert.deepStrictEqual(parseAndEval(' \'hi\' '), 'hi')
})

it('should parse a string containing quotes', function () {
it('should parse a string containing escape characters', function () {
// quote
assert.deepStrictEqual(parseAndEval("'with\"quote'"), 'with"quote')

Expand Down
15 changes: 15 additions & 0 deletions test/unit-tests/utils/string.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,16 @@ describe('string', function () {
assert.strictEqual(format('string'), '"string"')
})

it('should format a string with escape characters', function () {
assert.strictEqual(format('with " double quote'), '"with \\" double quote"')
assert.strictEqual(format('with \\ backslash'), '"with \\\\ backslash"')
assert.strictEqual(format('with \b'), '"with \\b"')
assert.strictEqual(format('with \f'), '"with \\f"')
assert.strictEqual(format('with \n newline'), '"with \\n newline"')
assert.strictEqual(format('with \r'), '"with \\r"')
assert.strictEqual(format('with \t tab'), '"with \\t tab"')
})

it('should format an object', function () {
const obj = {
a: 1.1111,
Expand All @@ -81,6 +91,11 @@ describe('string', function () {
assert.strictEqual(format(obj, 3), '{"a": 1.11, "b": 2.22 + 3i}')
})

it('should format an object with escape characters', function () {
assert.strictEqual(format({ 'with " double quote': 42 }), '{"with \\" double quote": 42}')
assert.strictEqual(format({ 'with \\ backslash': 42 }), '{"with \\\\ backslash": 42}')
})

it('should format an object with its own format function', function () {
const obj = {
format: function (options) {
Expand Down

0 comments on commit d1c4a56

Please sign in to comment.