diff --git a/tools/cldr-apps/js/src/esm/cldrEscaper.mjs b/tools/cldr-apps/js/src/esm/cldrEscaper.mjs new file mode 100644 index 00000000000..d02c1702a8c --- /dev/null +++ b/tools/cldr-apps/js/src/esm/cldrEscaper.mjs @@ -0,0 +1,57 @@ +let data = null; + +const staticInfo = { + forceEscapeRegex: "[\\u200e\\u200f\\uFFF0]", + names: { + "\u200e": { name: "LRM" }, + "\u200f": { name: "RLM" }, + }, +}; // start from static info - useful for tests + +/** updates content and recompiles regex */ +export function updateInfo(escapedCharInfo) { + const updatedRegex = escapedCharInfo.forceEscapeRegex + .replace(/\\ /g, " ") + .replace(/\\U[0]*([0-9a-fA-F]+)/g, `\\u{$1}`); + console.log(updatedRegex); + const forceEscape = new RegExp(updatedRegex, "u"); + data = { escapedCharInfo, forceEscape }; +} + +// we preload the static info +updateInfo(staticInfo); + +export function needsEscaping(str) { + if (!str) return false; + return data?.forceEscape?.test(str); +} + +/** + * Escape any named invisible code points, if needed + * @param {string} str input string + * @returns escaped string such as `<LRM>` or falsy if no escaping was needed + */ +export function getEscapedHtml(str) { + if (needsEscaping(str)) { + const escaped = escapeHtml(str); + return escaped; + } + return undefined; +} + +/** get information for one char, or null */ +export function getCharInfo(str) { + return data?.escapedCharInfo?.names[str]; +} + +/** Unconditionally escape (without testing) */ +function escapeHtml(str) { + return str.replace(data?.forceEscape, (o) => { + const e = getCharInfo(o) || { + name: `U+${Number(o.codePointAt(0)).toString(16).toUpperCase()}`, + }; + return `${e.name}`; + }); +} diff --git a/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs b/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs new file mode 100644 index 00000000000..7f468cc19d2 --- /dev/null +++ b/tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs @@ -0,0 +1,9 @@ +import * as cldrEscaper from "./cldrEscaper.mjs"; +import * as cldrClient from "./cldrClient.mjs"; + +/** load the escaper's map from the server */ +export async function updateEscaperFromServer() { + const client = await cldrClient.getClient(); + const { body } = await client.apis.info.getEscapedCharInfo(); + cldrEscaper.updateInfo(body); // update regex +} diff --git a/tools/cldr-apps/js/src/esm/cldrGui.mjs b/tools/cldr-apps/js/src/esm/cldrGui.mjs index 06efc07440a..ee3dc81c173 100644 --- a/tools/cldr-apps/js/src/esm/cldrGui.mjs +++ b/tools/cldr-apps/js/src/esm/cldrGui.mjs @@ -3,6 +3,7 @@ */ import * as cldrAjax from "./cldrAjax.mjs"; import * as cldrDashContext from "./cldrDashContext.mjs"; +import * as cldrEscaperLoader from "./cldrEscaperLoader.mjs"; import * as cldrEvent from "./cldrEvent.mjs"; import * as cldrForum from "./cldrForum.mjs"; import * as cldrInfo from "./cldrInfo.mjs"; @@ -55,7 +56,19 @@ function run() { } catch (e) { return Promise.reject(e); } - return ensureSession().then(completeStartupWithSession); + // We load + return initialSetup().then(completeStartupWithSession); +} + +/** Hook for loading all things we want loaded - locales, menus, etc */ +async function initialSetup() { + await Promise.all([ + ensureSession(), // that we have a session + // any other things can go here + cldrEscaperLoader.updateEscaperFromServer(), + // TODO: locale map + // TOOD: initial menus + ]); } async function ensureSession() { diff --git a/tools/cldr-apps/js/src/esm/cldrTable.mjs b/tools/cldr-apps/js/src/esm/cldrTable.mjs index f29b22b0533..af17e472d7a 100644 --- a/tools/cldr-apps/js/src/esm/cldrTable.mjs +++ b/tools/cldr-apps/js/src/esm/cldrTable.mjs @@ -14,6 +14,7 @@ import * as cldrAjax from "./cldrAjax.mjs"; import * as cldrCoverage from "./cldrCoverage.mjs"; import * as cldrDashContext from "./cldrDashContext.mjs"; import * as cldrDom from "./cldrDom.mjs"; +import * as cldrEscaper from "./cldrEscaper.mjs"; import * as cldrEvent from "./cldrEvent.mjs"; import * as cldrGui from "./cldrGui.mjs"; import * as cldrInfo from "./cldrInfo.mjs"; @@ -1010,13 +1011,11 @@ function showItemInfoFn(theRow, item) { */ function checkLRmarker(field, value) { if (value) { - if (value.indexOf("\u200E") > -1 || value.indexOf("\u200F") > -1) { - value = value - .replace(/\u200E/g, '<LRM>') - .replace(/\u200F/g, '<RLM>'); + const escapedValue = cldrEscaper.getEscapedHtml(value); + if (escapedValue) { const lrm = document.createElement("div"); lrm.className = "lrmarker-container"; - lrm.innerHTML = value; + lrm.innerHTML = escapedValue; field.appendChild(lrm); } } diff --git a/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs b/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs new file mode 100644 index 00000000000..2cb471459f6 --- /dev/null +++ b/tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs @@ -0,0 +1,41 @@ +import { expect } from "chai"; +import mocha from "mocha"; + +import * as cldrEscaper from "../../src/esm/cldrEscaper.mjs"; + +function uplus(ch) { + if (!ch) return ch; + return "U+" + Number(ch.codePointAt(0)).toString(16); +} + +describe("cldrEscaper test", function () { + describe("LRM/RLM test", function () { + for (const ch of ["\u200E", "\u200F", "\uFFF0"]) { + it(`returns true for ${uplus(ch)}`, function () { + expect(cldrEscaper.needsEscaping(ch)).to.be.ok; + }); + } + for (const ch of [undefined, false, null, " ", "X"]) { + it(`returns false for ${uplus(ch)}`, function () { + expect(cldrEscaper.needsEscaping(ch)).to.not.be.ok; + }); + } + }); + describe("Escaping Test", () => { + it(`Should return undefined for a non-escapable str`, () => { + expect(cldrEscaper.getEscapedHtml(`dd/MM/y`)).to.not.be.ok; + }); + it(`Should return HTML for a non-escapable str`, () => { + const html = cldrEscaper.getEscapedHtml(`dd‏/MM‏/y`); // U+200F / U+200F here + expect(html).to.be.ok; + expect(html).to.contain('class="visible-mark"'); + expect(html).to.contain("RLM"); + }); + it(`Should return hex for a unknown str`, () => { + const html = cldrEscaper.getEscapedHtml(`\uFFF0`); // U+200F / U+200F here + expect(html).to.be.ok; + expect(html).to.contain('class="visible-mark"'); + expect(html).to.contain("U+FFF0"); + }); + }); +}); diff --git a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java new file mode 100644 index 00000000000..b1918b67573 --- /dev/null +++ b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/CharInfo.java @@ -0,0 +1,68 @@ +package org.unicode.cldr.web.api; + +import java.util.HashMap; +import java.util.Map; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.eclipse.microprofile.openapi.annotations.Operation; +import org.eclipse.microprofile.openapi.annotations.media.Content; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponses; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; +import org.unicode.cldr.util.CodePointEscaper; + +@Path("/info/chars") +@Tag(name = "info", description = "General Information") +public class CharInfo { + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Operation( + summary = "Get Escaping Map", + description = "This returns a list of escapable characters") + @APIResponses( + value = { + @APIResponse( + responseCode = "200", + description = "Results of Character request", + content = + @Content( + mediaType = "application/json", + schema = @Schema(implementation = EscapedCharInfo.class))), + }) + public Response getEscapedCharInfo() { + return Response.ok(EscapedCharInfo.INSTANCE).build(); + } + + /** unpacks the enum into a struct */ + public static final class EscapedCharEntry { + public final String name; + public final String shortName; + public final String description; + + public EscapedCharEntry(final CodePointEscaper c) { + name = c.name(); + shortName = c.getShortName(); + description = c.getDescription(); + } + } + + public static final class EscapedCharInfo { + public final String forceEscapeRegex = + CodePointEscaper.ESCAPE_IN_SURVEYTOOL.toPattern(true); + public final Map names = new HashMap<>(); + + EscapedCharInfo() { + for (final CodePointEscaper c : CodePointEscaper.values()) { + names.put(c.getString(), new EscapedCharEntry(c)); + } + } + + /** Constant data, so a singleton is fine */ + public static final EscapedCharInfo INSTANCE = new EscapedCharInfo(); + } +} diff --git a/tools/cldr-apps/src/main/webapp/css/redesign.css b/tools/cldr-apps/src/main/webapp/css/redesign.css index 3aa06438e1a..7887c3c78f1 100644 --- a/tools/cldr-apps/src/main/webapp/css/redesign.css +++ b/tools/cldr-apps/src/main/webapp/css/redesign.css @@ -242,6 +242,12 @@ h3.collapse-review > span:first-child {position: relative; top: -220px;display: display: flex; border: 1px solid #0FF; } +.data .visible-mark::before{ + content: '<'; +} +.data .visible-mark::after{ + content: '>'; +} .data .visible-mark{ background-color: #d9edf7; border-color: #bce8f1; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java index 5939368771c..4cf287fe960 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java @@ -116,6 +116,10 @@ public enum CodePointEscaper { .removeAll(EMOJI_INVISIBLES) .freeze(); + /** set to be escaped in the surveytool */ + public static final UnicodeSet ESCAPE_IN_SURVEYTOOL = + FORCE_ESCAPE.cloneAsThawed().remove(SP.getCodePoint()).freeze(); + public static final UnicodeSet NON_SPACING = new UnicodeSet("[[:Mn:][:Me:]]").freeze(); public static final UnicodeSet FORCE_ESCAPE_WITH_NONSPACING = @@ -254,7 +258,7 @@ public static String toUnescaped(String escaped) { private static final String HAS_NAME = " ≡ "; public static String toExample(int codePoint) { - CodePointEscaper cpe = _fromCodePoint.get(codePoint); + CodePointEscaper cpe = forCodePoint(codePoint); if (cpe == null) { // hex final String name = UCharacter.getExtendedName(codePoint); return codePointToEscaped(codePoint) @@ -267,6 +271,14 @@ public static String toExample(int codePoint) { } } + static CodePointEscaper forCodePoint(int codePoint) { + return _fromCodePoint.get(codePoint); + } + + static CodePointEscaper forCodePoint(String str) { + return forCodePoint(str.codePointAt(0)); + } + /** * Returns a code point from an abbreviation string or hex string without the escape * brackets diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java new file mode 100644 index 00000000000..06ffd339d58 --- /dev/null +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCodePointEscaper.java @@ -0,0 +1,21 @@ +package org.unicode.cldr.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +/** + * @see org.unicode.cldr.unittest.UnicodeSetPrettyPrinterTest + */ +public class TestCodePointEscaper { + @Test + void testForEach() { + for (final CodePointEscaper e : CodePointEscaper.values()) { + assertEquals(e, CodePointEscaper.forCodePoint(e.getString())); + assertTrue( + CodePointEscaper.FORCE_ESCAPE.contains(e.getCodePoint()), + () -> "For " + e.name()); + } + } +}