Skip to content

Commit 3657de6

Browse files
authored
Merge pull request #45 from projectfluent/UXXXXXX
Support UXXXXXX escape sequences
2 parents 0325bb2 + eb129e8 commit 3657de6

File tree

2 files changed

+90
-6
lines changed

2 files changed

+90
-6
lines changed

fluent.syntax/src/main/kotlin/org/projectfluent/syntax/processor/Processor.kt

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.projectfluent.syntax.processor
22

33
import org.projectfluent.syntax.ast.* // ktlint-disable no-wildcard-imports
4-
import java.lang.Exception
4+
import java.lang.StringBuilder
55

66
/**
77
* Process patterns by returning new patterns with elements transformed.
@@ -37,7 +37,7 @@ class Processor {
3737
when (element) {
3838
is TextElement -> {
3939
if (lastText == null) {
40-
lastText = element
40+
lastText = TextElement(element.value)
4141
} else {
4242
lastText?.let { it.value += element.value }
4343
}
@@ -147,16 +147,40 @@ class Processor {
147147
}
148148

149149
private val special =
150-
"""\\(([\\"])|(u[0-9a-fA-F]{4}))""".toRegex()
150+
"""\\(([\\"])|(u[0-9a-fA-F]{4})|(U[0-90a-fA-F]{6}))""".toRegex()
151151

152152
private fun unescape(matchResult: MatchResult): CharSequence {
153153
val matches = matchResult.groupValues.drop(2).listIterator()
154154
val simple = matches.next()
155-
if (simple != "") { return simple }
155+
if (simple != "") {
156+
return simple
157+
}
158+
156159
val uni4 = matches.next()
157160
if (uni4 != "") {
158-
return uni4.substring(1).toInt(16).toChar().toString()
161+
val codepoint = uni4.substring(1).toInt(16)
162+
if (Character.isBmpCodePoint(codepoint)) {
163+
val char = codepoint.toChar()
164+
if (!Character.isSurrogate(char)) {
165+
return char.toString()
166+
}
167+
}
159168
}
160-
throw Exception("Unexpected")
169+
170+
val uni6 = matches.next()
171+
if (uni6 != "") {
172+
val codepoint = uni6.substring(1).toInt(16)
173+
if (Character.isValidCodePoint(codepoint)) {
174+
val char = codepoint.toChar()
175+
if (!Character.isSurrogate(char)) {
176+
val builder = StringBuilder()
177+
builder.append(Character.highSurrogate(codepoint))
178+
builder.append(Character.lowSurrogate(codepoint))
179+
return builder
180+
}
181+
}
182+
}
183+
184+
return ""
161185
}
162186
}

fluent.syntax/src/test/kotlin/org/projectfluent/syntax/processor/ProcessorTest.kt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,26 @@ internal class ProcessorTest {
3232
processor.unescapeLiteralsToText(pattern)
3333
)
3434

35+
pattern.elements.clear()
36+
pattern.elements.addAll(
37+
arrayOf(
38+
TextElement("Foo "),
39+
Placeable(expression = StringLiteral("Bar"))
40+
)
41+
)
42+
assertEquals(
43+
Pattern(TextElement("Foo Bar")),
44+
processor.unescapeLiteralsToText(pattern)
45+
)
46+
// The original Pattern isn't modified.
47+
assertEquals(
48+
Pattern(
49+
TextElement("Foo "),
50+
Placeable(expression = StringLiteral("Bar"))
51+
),
52+
pattern
53+
)
54+
3555
pattern.elements.clear()
3656
pattern.elements.addAll(
3757
arrayOf(
@@ -45,6 +65,46 @@ internal class ProcessorTest {
4565
processor.unescapeLiteralsToText(pattern)
4666
)
4767

68+
pattern.elements.clear()
69+
pattern.elements.addAll(
70+
arrayOf(
71+
TextElement("Emoji: "),
72+
Placeable(expression = StringLiteral("""\U01f602"""))
73+
)
74+
)
75+
assertEquals(
76+
Pattern(TextElement("Emoji: \uD83D\uDE02")),
77+
processor.unescapeLiteralsToText(pattern)
78+
)
79+
assertEquals(
80+
Pattern(TextElement("Emoji: 😂")),
81+
processor.unescapeLiteralsToText(pattern)
82+
)
83+
84+
pattern.elements.clear()
85+
pattern.elements.addAll(
86+
arrayOf(
87+
TextElement("Illegal escape sequence: "),
88+
Placeable(expression = StringLiteral("""\ud800"""))
89+
)
90+
)
91+
assertEquals(
92+
Pattern(TextElement("Illegal escape sequence: �")),
93+
processor.unescapeLiteralsToText(pattern)
94+
)
95+
96+
pattern.elements.clear()
97+
pattern.elements.addAll(
98+
arrayOf(
99+
TextElement("Illegal escape sequence: "),
100+
Placeable(expression = StringLiteral("""\U00d800"""))
101+
)
102+
)
103+
assertEquals(
104+
Pattern(TextElement("Illegal escape sequence: �")),
105+
processor.unescapeLiteralsToText(pattern)
106+
)
107+
48108
pattern.elements.clear()
49109
pattern.elements.addAll(
50110
arrayOf(

0 commit comments

Comments
 (0)