Skip to content

Commit 6951d32

Browse files
committed
Treat backslash as normal char in TextElements
1 parent bac1271 commit 6951d32

File tree

4 files changed

+209
-56
lines changed

4 files changed

+209
-56
lines changed

spec/fluent.ebnf

+18-16
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ InlineExpression ::= StringLiteral
5757
| inline_placeable
5858

5959
/* Literals */
60-
StringLiteral ::= quote quoted_text_char* quote
60+
StringLiteral ::= "\"" quoted_text_char* "\""
6161
NumberLiteral ::= "-"? digit+ ("." digit+)?
6262

6363
/* Inline Expressions */
@@ -84,22 +84,24 @@ Identifier ::= [a-zA-Z] [a-zA-Z0-9_-]*
8484
Function ::= [A-Z] [A-Z_?-]*
8585

8686
/* Characters */
87-
backslash ::= "\\"
88-
quote ::= "\""
89-
/* Any Unicode character from BMP excluding C0 control characters, space,
90-
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
91-
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
92-
*/
93-
regular_char ::= [\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}]
94-
text_char ::= blank_inline
95-
| "\u0009"
96-
| /\\u[0-9a-fA-F]{4}/
97-
| (backslash backslash)
98-
| (backslash "{")
99-
| (regular_char - "{" - backslash)
87+
/* Any Unicode character excluding C0 control characters (but including tab),
88+
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
89+
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
90+
regular_char ::= [\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}]
91+
| [\\u{10000}-\\u{10FFFF}]
92+
/* The opening brace in text starts a placeable. */
93+
text_char ::= (regular_char - "{")
94+
| "\u0020"
95+
/* Indented text may not start with characters which mark its end. */
10096
indented_char ::= text_char - "}" - "[" - "*" - "."
101-
quoted_text_char ::= (text_char - quote)
102-
| (backslash quote)
97+
/* Backslash can be used to escape the double quote and the backslash itself.
98+
* The literal opening brace { is allowed because StringLiterals may not have
99+
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
100+
quoted_text_char ::= (text_char - "\"" - "\\")
101+
| /\\u[0-9a-fA-F]{4}/
102+
| "{"
103+
| "\\\\"
104+
| "\\\""
103105
digit ::= [0-9]
104106

105107
/* Whitespace */

syntax/grammar.mjs

+24-28
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,9 @@ let InlineExpression = defer(() =>
199199
/* Literals */
200200
let StringLiteral = defer(() =>
201201
sequence(
202-
quote,
202+
string("\""),
203203
repeat(quoted_text_char),
204-
quote)
204+
string("\""))
205205
.map(element_at(1))
206206
.map(join)
207207
.chain(into(FTL.StringLiteral)));
@@ -373,48 +373,44 @@ let Function =
373373
/* ---------- */
374374
/* Characters */
375375

376-
let backslash = string("\\");
377-
let quote = string("\"");
378-
379-
/* Any Unicode character from BMP excluding C0 control characters, space,
380-
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
381-
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
382-
*/
376+
/* Any Unicode character excluding C0 control characters (but including tab),
377+
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
378+
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
383379
let regular_char =
384-
charset("\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}");
380+
either(
381+
charset("\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}"),
382+
charset("\\u{10000}-\\u{10FFFF}"));
385383

386-
let text_char = defer(() =>
384+
/* The opening brace in text starts a placeable. */
385+
let text_char =
387386
either(
388-
blank_inline,
389-
string("\u0009"),
390-
regex(/\\u[0-9a-fA-F]{4}/),
391-
sequence(
392-
backslash,
393-
backslash).map(join),
394-
sequence(
395-
backslash,
396-
string("{")).map(join),
397387
and(
398-
not(backslash),
399388
not(string("{")),
400-
regular_char)));
389+
regular_char),
390+
string("\u0020"));
401391

402-
let indented_char = defer(() =>
392+
/* Indented text may not start with characters which mark its end. */
393+
let indented_char =
403394
and(
404395
not(string(".")),
405396
not(string("*")),
406397
not(string("[")),
407398
not(string("}")),
408-
text_char));
399+
text_char);
409400

401+
/* Backslash can be used to escape the double quote and the backslash itself.
402+
* The literal opening brace { is allowed because StringLiterals may not have
403+
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
410404
let quoted_text_char =
411405
either(
412406
and(
413-
not(quote),
407+
not(string("\\")),
408+
not(string("\"")),
414409
text_char),
415-
sequence(
416-
backslash,
417-
quote).map(join));
410+
regex(/\\u[0-9a-fA-F]{4}/),
411+
string("{"),
412+
string("\\\\"),
413+
string("\\\""));
418414

419415
let digit = charset("0-9");
420416

test/fixtures/escaped_characters.ftl

+18-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1-
backslash = Value with \\ (an escaped backslash)
2-
closing-brace = Value with \{ (a closing brace)
3-
unicode-escape = \u0041
4-
escaped-unicode = \\u0041
1+
## Literal text
2+
text-backslash-one = Value with \ a backslash
3+
text-backslash-two = Value with \\ two backslashes
4+
text-backslash-brace = Value with \{placeable}
5+
text-backslash-u = \u0041
6+
text-backslash-backslash-u = \\u0041
57
6-
## String Expressions
8+
## String literals
79
quote-in-string = {"\""}
810
backslash-in-string = {"\\"}
11+
# ERROR Mismatched quote
912
mismatched-quote = {"\\""}
13+
# ERROR Unknown escape
14+
unknown-escape = {"\x"}
15+
16+
## Unicode escapes
17+
string-unicode-sequence = {"\u0041"}
18+
string-escaped-unicode = {"\\u0041"}
19+
20+
## Literal braces
21+
brace-open = An opening {"{"} brace.
22+
brace-close = A closing } brace.

test/fixtures/escaped_characters.json

+149-7
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
{
22
"type": "Resource",
33
"body": [
4+
{
5+
"type": "GroupComment",
6+
"content": "Literal text"
7+
},
48
{
59
"type": "Message",
610
"id": {
711
"type": "Identifier",
8-
"name": "backslash"
12+
"name": "text-backslash-one"
913
},
1014
"value": {
1115
"type": "Pattern",
1216
"elements": [
1317
{
1418
"type": "TextElement",
15-
"value": "Value with \\\\ (an escaped backslash)"
19+
"value": "Value with \\ a backslash"
1620
}
1721
]
1822
},
@@ -23,14 +27,14 @@
2327
"type": "Message",
2428
"id": {
2529
"type": "Identifier",
26-
"name": "closing-brace"
30+
"name": "text-backslash-two"
2731
},
2832
"value": {
2933
"type": "Pattern",
3034
"elements": [
3135
{
3236
"type": "TextElement",
33-
"value": "Value with \\{ (a closing brace)"
37+
"value": "Value with \\\\ two backslashes"
3438
}
3539
]
3640
},
@@ -41,7 +45,35 @@
4145
"type": "Message",
4246
"id": {
4347
"type": "Identifier",
44-
"name": "unicode-escape"
48+
"name": "text-backslash-brace"
49+
},
50+
"value": {
51+
"type": "Pattern",
52+
"elements": [
53+
{
54+
"type": "TextElement",
55+
"value": "Value with \\"
56+
},
57+
{
58+
"type": "Placeable",
59+
"expression": {
60+
"type": "MessageReference",
61+
"id": {
62+
"type": "Identifier",
63+
"name": "placeable"
64+
}
65+
}
66+
}
67+
]
68+
},
69+
"attributes": [],
70+
"comment": null
71+
},
72+
{
73+
"type": "Message",
74+
"id": {
75+
"type": "Identifier",
76+
"name": "text-backslash-u"
4577
},
4678
"value": {
4779
"type": "Pattern",
@@ -59,7 +91,7 @@
5991
"type": "Message",
6092
"id": {
6193
"type": "Identifier",
62-
"name": "escaped-unicode"
94+
"name": "text-backslash-backslash-u"
6395
},
6496
"value": {
6597
"type": "Pattern",
@@ -75,7 +107,7 @@
75107
},
76108
{
77109
"type": "GroupComment",
78-
"content": "String Expressions"
110+
"content": "String literals"
79111
},
80112
{
81113
"type": "Message",
@@ -119,10 +151,120 @@
119151
"attributes": [],
120152
"comment": null
121153
},
154+
{
155+
"type": "Comment",
156+
"content": "ERROR Mismatched quote"
157+
},
122158
{
123159
"type": "Junk",
124160
"annotations": [],
125161
"content": "mismatched-quote = {\"\\\\\"\"}\n"
162+
},
163+
{
164+
"type": "Comment",
165+
"content": "ERROR Unknown escape"
166+
},
167+
{
168+
"type": "Junk",
169+
"annotations": [],
170+
"content": "unknown-escape = {\"\\x\"}\n"
171+
},
172+
{
173+
"type": "GroupComment",
174+
"content": "Unicode escapes"
175+
},
176+
{
177+
"type": "Message",
178+
"id": {
179+
"type": "Identifier",
180+
"name": "string-unicode-sequence"
181+
},
182+
"value": {
183+
"type": "Pattern",
184+
"elements": [
185+
{
186+
"type": "Placeable",
187+
"expression": {
188+
"type": "StringLiteral",
189+
"value": "\\u0041"
190+
}
191+
}
192+
]
193+
},
194+
"attributes": [],
195+
"comment": null
196+
},
197+
{
198+
"type": "Message",
199+
"id": {
200+
"type": "Identifier",
201+
"name": "string-escaped-unicode"
202+
},
203+
"value": {
204+
"type": "Pattern",
205+
"elements": [
206+
{
207+
"type": "Placeable",
208+
"expression": {
209+
"type": "StringLiteral",
210+
"value": "\\\\u0041"
211+
}
212+
}
213+
]
214+
},
215+
"attributes": [],
216+
"comment": null
217+
},
218+
{
219+
"type": "GroupComment",
220+
"content": "Literal braces"
221+
},
222+
{
223+
"type": "Message",
224+
"id": {
225+
"type": "Identifier",
226+
"name": "brace-open"
227+
},
228+
"value": {
229+
"type": "Pattern",
230+
"elements": [
231+
{
232+
"type": "TextElement",
233+
"value": "An opening "
234+
},
235+
{
236+
"type": "Placeable",
237+
"expression": {
238+
"type": "StringLiteral",
239+
"value": "{"
240+
}
241+
},
242+
{
243+
"type": "TextElement",
244+
"value": " brace."
245+
}
246+
]
247+
},
248+
"attributes": [],
249+
"comment": null
250+
},
251+
{
252+
"type": "Message",
253+
"id": {
254+
"type": "Identifier",
255+
"name": "brace-close"
256+
},
257+
"value": {
258+
"type": "Pattern",
259+
"elements": [
260+
{
261+
"type": "TextElement",
262+
"value": "A closing } brace."
263+
}
264+
]
265+
},
266+
"attributes": [],
267+
"comment": null
126268
}
127269
]
128270
}

0 commit comments

Comments
 (0)