Skip to content

Commit 391b3d2

Browse files
authored
CLDR-16765 Show un printables (#4664)
1 parent 4957d8a commit 391b3d2

File tree

9 files changed

+233
-7
lines changed

9 files changed

+233
-7
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
let data = null;
2+
3+
const staticInfo = {
4+
forceEscapeRegex: "[\\u200e\\u200f\\uFFF0]",
5+
names: {
6+
"\u200e": { name: "LRM" },
7+
"\u200f": { name: "RLM" },
8+
},
9+
}; // start from static info - useful for tests
10+
11+
/** updates content and recompiles regex */
12+
export function updateInfo(escapedCharInfo) {
13+
const updatedRegex = escapedCharInfo.forceEscapeRegex
14+
.replace(/\\ /g, " ")
15+
.replace(/\\U[0]*([0-9a-fA-F]+)/g, `\\u{$1}`);
16+
console.log(updatedRegex);
17+
const forceEscape = new RegExp(updatedRegex, "u");
18+
data = { escapedCharInfo, forceEscape };
19+
}
20+
21+
// we preload the static info
22+
updateInfo(staticInfo);
23+
24+
export function needsEscaping(str) {
25+
if (!str) return false;
26+
return data?.forceEscape?.test(str);
27+
}
28+
29+
/**
30+
* Escape any named invisible code points, if needed
31+
* @param {string} str input string
32+
* @returns escaped string such as `<span class="visible-mark">&lt;LRM&gt;</span>` or falsy if no escaping was needed
33+
*/
34+
export function getEscapedHtml(str) {
35+
if (needsEscaping(str)) {
36+
const escaped = escapeHtml(str);
37+
return escaped;
38+
}
39+
return undefined;
40+
}
41+
42+
/** get information for one char, or null */
43+
export function getCharInfo(str) {
44+
return data?.escapedCharInfo?.names[str];
45+
}
46+
47+
/** Unconditionally escape (without testing) */
48+
function escapeHtml(str) {
49+
return str.replace(data?.forceEscape, (o) => {
50+
const e = getCharInfo(o) || {
51+
name: `U+${Number(o.codePointAt(0)).toString(16).toUpperCase()}`,
52+
};
53+
return `<span class="visible-mark" title="${e.shortName || e.name}\n ${
54+
e.description || ""
55+
}">${e.name}</span>`;
56+
});
57+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import * as cldrEscaper from "./cldrEscaper.mjs";
2+
import * as cldrClient from "./cldrClient.mjs";
3+
4+
/** load the escaper's map from the server */
5+
export async function updateEscaperFromServer() {
6+
const client = await cldrClient.getClient();
7+
const { body } = await client.apis.info.getEscapedCharInfo();
8+
cldrEscaper.updateInfo(body); // update regex
9+
}

tools/cldr-apps/js/src/esm/cldrGui.mjs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44
import * as cldrAjax from "./cldrAjax.mjs";
55
import * as cldrDashContext from "./cldrDashContext.mjs";
6+
import * as cldrEscaperLoader from "./cldrEscaperLoader.mjs";
67
import * as cldrEvent from "./cldrEvent.mjs";
78
import * as cldrForum from "./cldrForum.mjs";
89
import * as cldrInfo from "./cldrInfo.mjs";
@@ -55,7 +56,19 @@ function run() {
5556
} catch (e) {
5657
return Promise.reject(e);
5758
}
58-
return ensureSession().then(completeStartupWithSession);
59+
// We load
60+
return initialSetup().then(completeStartupWithSession);
61+
}
62+
63+
/** Hook for loading all things we want loaded - locales, menus, etc */
64+
async function initialSetup() {
65+
await Promise.all([
66+
ensureSession(), // that we have a session
67+
// any other things can go here
68+
cldrEscaperLoader.updateEscaperFromServer(),
69+
// TODO: locale map
70+
// TOOD: initial menus
71+
]);
5972
}
6073

6174
async function ensureSession() {

tools/cldr-apps/js/src/esm/cldrTable.mjs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import * as cldrAjax from "./cldrAjax.mjs";
1414
import * as cldrCoverage from "./cldrCoverage.mjs";
1515
import * as cldrDashContext from "./cldrDashContext.mjs";
1616
import * as cldrDom from "./cldrDom.mjs";
17+
import * as cldrEscaper from "./cldrEscaper.mjs";
1718
import * as cldrEvent from "./cldrEvent.mjs";
1819
import * as cldrGui from "./cldrGui.mjs";
1920
import * as cldrInfo from "./cldrInfo.mjs";
@@ -1010,13 +1011,11 @@ function showItemInfoFn(theRow, item) {
10101011
*/
10111012
function checkLRmarker(field, value) {
10121013
if (value) {
1013-
if (value.indexOf("\u200E") > -1 || value.indexOf("\u200F") > -1) {
1014-
value = value
1015-
.replace(/\u200E/g, '<span class="visible-mark">&lt;LRM&gt;</span>')
1016-
.replace(/\u200F/g, '<span class="visible-mark">&lt;RLM&gt;</span>');
1014+
const escapedValue = cldrEscaper.getEscapedHtml(value);
1015+
if (escapedValue) {
10171016
const lrm = document.createElement("div");
10181017
lrm.className = "lrmarker-container";
1019-
lrm.innerHTML = value;
1018+
lrm.innerHTML = escapedValue;
10201019
field.appendChild(lrm);
10211020
}
10221021
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { expect } from "chai";
2+
import mocha from "mocha";
3+
4+
import * as cldrEscaper from "../../src/esm/cldrEscaper.mjs";
5+
6+
function uplus(ch) {
7+
if (!ch) return ch;
8+
return "U+" + Number(ch.codePointAt(0)).toString(16);
9+
}
10+
11+
describe("cldrEscaper test", function () {
12+
describe("LRM/RLM test", function () {
13+
for (const ch of ["\u200E", "\u200F", "\uFFF0"]) {
14+
it(`returns true for ${uplus(ch)}`, function () {
15+
expect(cldrEscaper.needsEscaping(ch)).to.be.ok;
16+
});
17+
}
18+
for (const ch of [undefined, false, null, " ", "X"]) {
19+
it(`returns false for ${uplus(ch)}`, function () {
20+
expect(cldrEscaper.needsEscaping(ch)).to.not.be.ok;
21+
});
22+
}
23+
});
24+
describe("Escaping Test", () => {
25+
it(`Should return undefined for a non-escapable str`, () => {
26+
expect(cldrEscaper.getEscapedHtml(`dd/MM/y`)).to.not.be.ok;
27+
});
28+
it(`Should return HTML for a non-escapable str`, () => {
29+
const html = cldrEscaper.getEscapedHtml(`dd‏/MM‏/y`); // U+200F / U+200F here
30+
expect(html).to.be.ok;
31+
expect(html).to.contain('class="visible-mark"');
32+
expect(html).to.contain("RLM");
33+
});
34+
it(`Should return hex for a unknown str`, () => {
35+
const html = cldrEscaper.getEscapedHtml(`\uFFF0`); // U+200F / U+200F here
36+
expect(html).to.be.ok;
37+
expect(html).to.contain('class="visible-mark"');
38+
expect(html).to.contain("U+FFF0");
39+
});
40+
});
41+
});
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package org.unicode.cldr.web.api;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
import javax.ws.rs.GET;
6+
import javax.ws.rs.Path;
7+
import javax.ws.rs.Produces;
8+
import javax.ws.rs.core.MediaType;
9+
import javax.ws.rs.core.Response;
10+
import org.eclipse.microprofile.openapi.annotations.Operation;
11+
import org.eclipse.microprofile.openapi.annotations.media.Content;
12+
import org.eclipse.microprofile.openapi.annotations.media.Schema;
13+
import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;
14+
import org.eclipse.microprofile.openapi.annotations.responses.APIResponses;
15+
import org.eclipse.microprofile.openapi.annotations.tags.Tag;
16+
import org.unicode.cldr.util.CodePointEscaper;
17+
18+
@Path("/info/chars")
19+
@Tag(name = "info", description = "General Information")
20+
public class CharInfo {
21+
22+
@GET
23+
@Produces(MediaType.APPLICATION_JSON)
24+
@Operation(
25+
summary = "Get Escaping Map",
26+
description = "This returns a list of escapable characters")
27+
@APIResponses(
28+
value = {
29+
@APIResponse(
30+
responseCode = "200",
31+
description = "Results of Character request",
32+
content =
33+
@Content(
34+
mediaType = "application/json",
35+
schema = @Schema(implementation = EscapedCharInfo.class))),
36+
})
37+
public Response getEscapedCharInfo() {
38+
return Response.ok(EscapedCharInfo.INSTANCE).build();
39+
}
40+
41+
/** unpacks the enum into a struct */
42+
public static final class EscapedCharEntry {
43+
public final String name;
44+
public final String shortName;
45+
public final String description;
46+
47+
public EscapedCharEntry(final CodePointEscaper c) {
48+
name = c.name();
49+
shortName = c.getShortName();
50+
description = c.getDescription();
51+
}
52+
}
53+
54+
public static final class EscapedCharInfo {
55+
public final String forceEscapeRegex =
56+
CodePointEscaper.ESCAPE_IN_SURVEYTOOL.toPattern(true);
57+
public final Map<String, EscapedCharEntry> names = new HashMap<>();
58+
59+
EscapedCharInfo() {
60+
for (final CodePointEscaper c : CodePointEscaper.values()) {
61+
names.put(c.getString(), new EscapedCharEntry(c));
62+
}
63+
}
64+
65+
/** Constant data, so a singleton is fine */
66+
public static final EscapedCharInfo INSTANCE = new EscapedCharInfo();
67+
}
68+
}

tools/cldr-apps/src/main/webapp/css/redesign.css

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,12 @@ h3.collapse-review > span:first-child {position: relative; top: -220px;display:
242242
display: flex;
243243
border: 1px solid #0FF;
244244
}
245+
.data .visible-mark::before{
246+
content: '<';
247+
}
248+
.data .visible-mark::after{
249+
content: '>';
250+
}
245251
.data .visible-mark{
246252
background-color: #d9edf7;
247253
border-color: #bce8f1;

tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ public enum CodePointEscaper {
116116
.removeAll(EMOJI_INVISIBLES)
117117
.freeze();
118118

119+
/** set to be escaped in the surveytool */
120+
public static final UnicodeSet ESCAPE_IN_SURVEYTOOL =
121+
FORCE_ESCAPE.cloneAsThawed().remove(SP.getCodePoint()).freeze();
122+
119123
public static final UnicodeSet NON_SPACING = new UnicodeSet("[[:Mn:][:Me:]]").freeze();
120124

121125
public static final UnicodeSet FORCE_ESCAPE_WITH_NONSPACING =
@@ -254,7 +258,7 @@ public static String toUnescaped(String escaped) {
254258
private static final String HAS_NAME = " ≡ ";
255259

256260
public static String toExample(int codePoint) {
257-
CodePointEscaper cpe = _fromCodePoint.get(codePoint);
261+
CodePointEscaper cpe = forCodePoint(codePoint);
258262
if (cpe == null) { // hex
259263
final String name = UCharacter.getExtendedName(codePoint);
260264
return codePointToEscaped(codePoint)
@@ -267,6 +271,14 @@ public static String toExample(int codePoint) {
267271
}
268272
}
269273

274+
static CodePointEscaper forCodePoint(int codePoint) {
275+
return _fromCodePoint.get(codePoint);
276+
}
277+
278+
static CodePointEscaper forCodePoint(String str) {
279+
return forCodePoint(str.codePointAt(0));
280+
}
281+
270282
/**
271283
* Returns a code point from an abbreviation string or hex string <b>without the escape
272284
* brackets</b>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package org.unicode.cldr.util;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertTrue;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
/**
9+
* @see org.unicode.cldr.unittest.UnicodeSetPrettyPrinterTest
10+
*/
11+
public class TestCodePointEscaper {
12+
@Test
13+
void testForEach() {
14+
for (final CodePointEscaper e : CodePointEscaper.values()) {
15+
assertEquals(e, CodePointEscaper.forCodePoint(e.getString()));
16+
assertTrue(
17+
CodePointEscaper.FORCE_ESCAPE.contains(e.getCodePoint()),
18+
() -> "For " + e.name());
19+
}
20+
}
21+
}

0 commit comments

Comments
 (0)