Skip to content

Commit f4bf68b

Browse files
committed
Support UXXXXXX escape sequences
1 parent 1292cca commit f4bf68b

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

fluent.syntax/src/main/kotlin/org/projectfluent/syntax/processor/Processor.kt

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.projectfluent.syntax.processor
22

33
import org.projectfluent.syntax.ast.* // ktlint-disable no-wildcard-imports
44
import java.lang.Exception
5+
import java.lang.StringBuilder
56

67
/**
78
* Process patterns by returning new patterns with elements transformed.
@@ -147,16 +148,34 @@ class Processor {
147148
}
148149

149150
private val special =
150-
"""\\(([\\"])|(u[0-9a-fA-F]{4}))""".toRegex()
151+
"""\\(([\\"])|(u[0-9a-fA-F]{4})|(U[0-90a-fA-F]{6}))""".toRegex()
151152

152153
private fun unescape(matchResult: MatchResult): CharSequence {
153154
val matches = matchResult.groupValues.drop(2).listIterator()
154155
val simple = matches.next()
155-
if (simple != "") { return simple }
156+
if (simple != "") {
157+
return simple
158+
}
159+
156160
val uni4 = matches.next()
157161
if (uni4 != "") {
158-
return uni4.substring(1).toInt(16).toChar().toString()
162+
val codepoint = uni4.substring(1).toInt(16)
163+
if (Character.isBmpCodePoint(codepoint)) {
164+
return codepoint.toChar().toString()
165+
}
159166
}
167+
168+
val uni6 = matches.next()
169+
if (uni6 != "") {
170+
val codepoint = uni6.substring(1).toInt(16)
171+
if (Character.isValidCodePoint(codepoint)) {
172+
val builder = StringBuilder()
173+
builder.append(Character.highSurrogate(codepoint))
174+
builder.append(Character.lowSurrogate(codepoint))
175+
return builder
176+
}
177+
}
178+
160179
throw Exception("Unexpected")
161180
}
162181
}

fluent.syntax/src/test/kotlin/org/projectfluent/syntax/processor/ProcessorTest.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,22 @@ internal class ProcessorTest {
6565
processor.unescapeLiteralsToText(pattern)
6666
)
6767

68+
pattern.elements.clear()
69+
pattern.elements.addAll(
70+
arrayOf(
71+
TextElement("Emoji: "),
72+
Placeable(expression = StringLiteral("""\U01f602"""))
73+
)
74+
)
75+
assertEquals(
76+
Pattern(TextElement("Emoji: \uD83D\uDE02")),
77+
processor.unescapeLiteralsToText(pattern)
78+
)
79+
assertEquals(
80+
Pattern(TextElement("Emoji: 😂")),
81+
processor.unescapeLiteralsToText(pattern)
82+
)
83+
6884
pattern.elements.clear()
6985
pattern.elements.addAll(
7086
arrayOf(

0 commit comments

Comments
 (0)