diff --git a/tools/cldr-apps/js/src/esm/cldrEscaper.mjs b/tools/cldr-apps/js/src/esm/cldrEscaper.mjs
new file mode 100644
index 00000000000..d02c1702a8c
--- /dev/null
+++ b/tools/cldr-apps/js/src/esm/cldrEscaper.mjs
@@ -0,0 +1,57 @@
+let data = null;
+
+const staticInfo = {
+ forceEscapeRegex: "[\\u200e\\u200f\\uFFF0]",
+ names: {
+ "\u200e": { name: "LRM" },
+ "\u200f": { name: "RLM" },
+ },
+}; // start from static info - useful for tests
+
+/** updates content and recompiles regex */
+export function updateInfo(escapedCharInfo) {
+ const updatedRegex = escapedCharInfo.forceEscapeRegex
+ .replace(/\\ /g, " ")
+ .replace(/\\U[0]*([0-9a-fA-F]+)/g, `\\u{$1}`);
+ console.log(updatedRegex);
+ const forceEscape = new RegExp(updatedRegex, "u");
+ data = { escapedCharInfo, forceEscape };
+}
+
+// we preload the static info
+updateInfo(staticInfo);
+
+export function needsEscaping(str) {
+ if (!str) return false;
+ return data?.forceEscape?.test(str);
+}
+
+/**
+ * Escape any named invisible code points, if needed
+ * @param {string} str input string
+ * @returns escaped string such as `<LRM>` or falsy if no escaping was needed
+ */
+export function getEscapedHtml(str) {
+ if (needsEscaping(str)) {
+ const escaped = escapeHtml(str);
+ return escaped;
+ }
+ return undefined;
+}
+
+/** get information for one char, or null */
+export function getCharInfo(str) {
+ return data?.escapedCharInfo?.names[str];
+}
+
+/** Unconditionally escape (without testing) */
+function escapeHtml(str) {
+ return str.replace(data?.forceEscape, (o) => {
+ const e = getCharInfo(o) || {
+ name: `U+${Number(o.codePointAt(0)).toString(16).toUpperCase()}`,
+ };
+ return `${e.name}`;
+ });
+}
diff --git a/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs b/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs
new file mode 100644
index 00000000000..7f468cc19d2
--- /dev/null
+++ b/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs
@@ -0,0 +1,9 @@
+import * as cldrEscaper from "./cldrEscaper.mjs";
+import * as cldrClient from "./cldrClient.mjs";
+
+/** load the escaper's map from the server */
+export async function updateEscaperFromServer() {
+ const client = await cldrClient.getClient();
+ const { body } = await client.apis.info.getEscapedCharInfo();
+ cldrEscaper.updateInfo(body); // update regex
+}
diff --git a/tools/cldr-apps/js/src/esm/cldrGui.mjs b/tools/cldr-apps/js/src/esm/cldrGui.mjs
index 06efc07440a..ee3dc81c173 100644
--- a/tools/cldr-apps/js/src/esm/cldrGui.mjs
+++ b/tools/cldr-apps/js/src/esm/cldrGui.mjs
@@ -3,6 +3,7 @@
*/
import * as cldrAjax from "./cldrAjax.mjs";
import * as cldrDashContext from "./cldrDashContext.mjs";
+import * as cldrEscaperLoader from "./cldrEscaperLoader.mjs";
import * as cldrEvent from "./cldrEvent.mjs";
import * as cldrForum from "./cldrForum.mjs";
import * as cldrInfo from "./cldrInfo.mjs";
@@ -55,7 +56,19 @@ function run() {
} catch (e) {
return Promise.reject(e);
}
- return ensureSession().then(completeStartupWithSession);
+ // We load
+ return initialSetup().then(completeStartupWithSession);
+}
+
+/** Hook for loading all things we want loaded - locales, menus, etc */
+async function initialSetup() {
+ await Promise.all([
+ ensureSession(), // that we have a session
+ // any other things can go here
+ cldrEscaperLoader.updateEscaperFromServer(),
+ // TODO: locale map
+ // TOOD: initial menus
+ ]);
}
async function ensureSession() {
diff --git a/tools/cldr-apps/js/src/esm/cldrTable.mjs b/tools/cldr-apps/js/src/esm/cldrTable.mjs
index f29b22b0533..af17e472d7a 100644
--- a/tools/cldr-apps/js/src/esm/cldrTable.mjs
+++ b/tools/cldr-apps/js/src/esm/cldrTable.mjs
@@ -14,6 +14,7 @@ import * as cldrAjax from "./cldrAjax.mjs";
import * as cldrCoverage from "./cldrCoverage.mjs";
import * as cldrDashContext from "./cldrDashContext.mjs";
import * as cldrDom from "./cldrDom.mjs";
+import * as cldrEscaper from "./cldrEscaper.mjs";
import * as cldrEvent from "./cldrEvent.mjs";
import * as cldrGui from "./cldrGui.mjs";
import * as cldrInfo from "./cldrInfo.mjs";
@@ -1010,13 +1011,11 @@ function showItemInfoFn(theRow, item) {
*/
function checkLRmarker(field, value) {
if (value) {
- if (value.indexOf("\u200E") > -1 || value.indexOf("\u200F") > -1) {
- value = value
- .replace(/\u200E/g, '<LRM>')
- .replace(/\u200F/g, '<RLM>');
+ const escapedValue = cldrEscaper.getEscapedHtml(value);
+ if (escapedValue) {
const lrm = document.createElement("div");
lrm.className = "lrmarker-container";
- lrm.innerHTML = value;
+ lrm.innerHTML = escapedValue;
field.appendChild(lrm);
}
}
diff --git a/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs b/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs
new file mode 100644
index 00000000000..2cb471459f6
--- /dev/null
+++ b/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs
@@ -0,0 +1,41 @@
+import { expect } from "chai";
+import mocha from "mocha";
+
+import * as cldrEscaper from "../../src/esm/cldrEscaper.mjs";
+
+function uplus(ch) {
+ if (!ch) return ch;
+ return "U+" + Number(ch.codePointAt(0)).toString(16);
+}
+
+describe("cldrEscaper test", function () {
+ describe("LRM/RLM test", function () {
+ for (const ch of ["\u200E", "\u200F", "\uFFF0"]) {
+ it(`returns true for ${uplus(ch)}`, function () {
+ expect(cldrEscaper.needsEscaping(ch)).to.be.ok;
+ });
+ }
+ for (const ch of [undefined, false, null, " ", "X"]) {
+ it(`returns false for ${uplus(ch)}`, function () {
+ expect(cldrEscaper.needsEscaping(ch)).to.not.be.ok;
+ });
+ }
+ });
+ describe("Escaping Test", () => {
+ it(`Should return undefined for a non-escapable str`, () => {
+ expect(cldrEscaper.getEscapedHtml(`dd/MM/y`)).to.not.be.ok;
+ });
+ it(`Should return HTML for a non-escapable str`, () => {
+ const html = cldrEscaper.getEscapedHtml(`dd/MM/y`); // U+200F / U+200F here
+ expect(html).to.be.ok;
+ expect(html).to.contain('class="visible-mark"');
+ expect(html).to.contain("RLM");
+ });
+ it(`Should return hex for a unknown str`, () => {
+ const html = cldrEscaper.getEscapedHtml(`\uFFF0`); // U+200F / U+200F here
+ expect(html).to.be.ok;
+ expect(html).to.contain('class="visible-mark"');
+ expect(html).to.contain("U+FFF0");
+ });
+ });
+});
diff --git a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java
new file mode 100644
index 00000000000..b1918b67573
--- /dev/null
+++ b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java
@@ -0,0 +1,68 @@
+package org.unicode.cldr.web.api;
+
+import java.util.HashMap;
+import java.util.Map;
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import org.eclipse.microprofile.openapi.annotations.Operation;
+import org.eclipse.microprofile.openapi.annotations.media.Content;
+import org.eclipse.microprofile.openapi.annotations.media.Schema;
+import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;
+import org.eclipse.microprofile.openapi.annotations.responses.APIResponses;
+import org.eclipse.microprofile.openapi.annotations.tags.Tag;
+import org.unicode.cldr.util.CodePointEscaper;
+
+@Path("/info/chars")
+@Tag(name = "info", description = "General Information")
+public class CharInfo {
+
+ @GET
+ @Produces(MediaType.APPLICATION_JSON)
+ @Operation(
+ summary = "Get Escaping Map",
+ description = "This returns a list of escapable characters")
+ @APIResponses(
+ value = {
+ @APIResponse(
+ responseCode = "200",
+ description = "Results of Character request",
+ content =
+ @Content(
+ mediaType = "application/json",
+ schema = @Schema(implementation = EscapedCharInfo.class))),
+ })
+ public Response getEscapedCharInfo() {
+ return Response.ok(EscapedCharInfo.INSTANCE).build();
+ }
+
+ /** unpacks the enum into a struct */
+ public static final class EscapedCharEntry {
+ public final String name;
+ public final String shortName;
+ public final String description;
+
+ public EscapedCharEntry(final CodePointEscaper c) {
+ name = c.name();
+ shortName = c.getShortName();
+ description = c.getDescription();
+ }
+ }
+
+ public static final class EscapedCharInfo {
+ public final String forceEscapeRegex =
+ CodePointEscaper.ESCAPE_IN_SURVEYTOOL.toPattern(true);
+ public final Map names = new HashMap<>();
+
+ EscapedCharInfo() {
+ for (final CodePointEscaper c : CodePointEscaper.values()) {
+ names.put(c.getString(), new EscapedCharEntry(c));
+ }
+ }
+
+ /** Constant data, so a singleton is fine */
+ public static final EscapedCharInfo INSTANCE = new EscapedCharInfo();
+ }
+}
diff --git a/tools/cldr-apps/src/main/webapp/css/redesign.css b/tools/cldr-apps/src/main/webapp/css/redesign.css
index 3aa06438e1a..7887c3c78f1 100644
--- a/tools/cldr-apps/src/main/webapp/css/redesign.css
+++ b/tools/cldr-apps/src/main/webapp/css/redesign.css
@@ -242,6 +242,12 @@ h3.collapse-review > span:first-child {position: relative; top: -220px;display:
display: flex;
border: 1px solid #0FF;
}
+.data .visible-mark::before{
+ content: '<';
+}
+.data .visible-mark::after{
+ content: '>';
+}
.data .visible-mark{
background-color: #d9edf7;
border-color: #bce8f1;
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
index 5939368771c..4cf287fe960 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
@@ -116,6 +116,10 @@ public enum CodePointEscaper {
.removeAll(EMOJI_INVISIBLES)
.freeze();
+ /** set to be escaped in the surveytool */
+ public static final UnicodeSet ESCAPE_IN_SURVEYTOOL =
+ FORCE_ESCAPE.cloneAsThawed().remove(SP.getCodePoint()).freeze();
+
public static final UnicodeSet NON_SPACING = new UnicodeSet("[[:Mn:][:Me:]]").freeze();
public static final UnicodeSet FORCE_ESCAPE_WITH_NONSPACING =
@@ -254,7 +258,7 @@ public static String toUnescaped(String escaped) {
private static final String HAS_NAME = " ≡ ";
public static String toExample(int codePoint) {
- CodePointEscaper cpe = _fromCodePoint.get(codePoint);
+ CodePointEscaper cpe = forCodePoint(codePoint);
if (cpe == null) { // hex
final String name = UCharacter.getExtendedName(codePoint);
return codePointToEscaped(codePoint)
@@ -267,6 +271,14 @@ public static String toExample(int codePoint) {
}
}
+ static CodePointEscaper forCodePoint(int codePoint) {
+ return _fromCodePoint.get(codePoint);
+ }
+
+ static CodePointEscaper forCodePoint(String str) {
+ return forCodePoint(str.codePointAt(0));
+ }
+
/**
* Returns a code point from an abbreviation string or hex string without the escape
* brackets
diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java
new file mode 100644
index 00000000000..06ffd339d58
--- /dev/null
+++ b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java
@@ -0,0 +1,21 @@
+package org.unicode.cldr.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * @see org.unicode.cldr.unittest.UnicodeSetPrettyPrinterTest
+ */
+public class TestCodePointEscaper {
+ @Test
+ void testForEach() {
+ for (final CodePointEscaper e : CodePointEscaper.values()) {
+ assertEquals(e, CodePointEscaper.forCodePoint(e.getString()));
+ assertTrue(
+ CodePointEscaper.FORCE_ESCAPE.contains(e.getCodePoint()),
+ () -> "For " + e.name());
+ }
+ }
+}