Skip to content

CLDR-16765 Show un printables #4664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions tools/cldr-apps/js/src/esm/cldrEscaper.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
let data = null;

const staticInfo = {
forceEscapeRegex: "[\\u200e\\u200f\\uFFF0]",
names: {
"\u200e": { name: "LRM" },
"\u200f": { name: "RLM" },
},
}; // start from static info - useful for tests

/** updates content and recompiles regex */
export function updateInfo(escapedCharInfo) {
const updatedRegex = escapedCharInfo.forceEscapeRegex
.replace(/\\ /g, " ")
.replace(/\\U[0]*([0-9a-fA-F]+)/g, `\\u{$1}`);
console.log(updatedRegex);
const forceEscape = new RegExp(updatedRegex, "u");
data = { escapedCharInfo, forceEscape };
}

// we preload the static info
updateInfo(staticInfo);

export function needsEscaping(str) {
if (!str) return false;
return data?.forceEscape?.test(str);
}

/**
* Escape any named invisible code points, if needed
* @param {string} str input string
* @returns escaped string such as `<span class="visible-mark">&lt;LRM&gt;</span>` or falsy if no escaping was needed
*/
export function getEscapedHtml(str) {
if (needsEscaping(str)) {
const escaped = escapeHtml(str);
return escaped;
}
return undefined;
}

/** get information for one char, or null */
export function getCharInfo(str) {
return data?.escapedCharInfo?.names[str];
}

/** Unconditionally escape (without testing) */
function escapeHtml(str) {
return str.replace(data?.forceEscape, (o) => {
const e = getCharInfo(o) || {
name: `U+${Number(o.codePointAt(0)).toString(16).toUpperCase()}`,
};
return `<span class="visible-mark" title="${e.shortName || e.name}\n ${
e.description || ""
}">${e.name}</span>`;
});
}
9 changes: 9 additions & 0 deletions tools/cldr-apps/js/src/esm/cldrEscaperLoader.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import * as cldrEscaper from "./cldrEscaper.mjs";
import * as cldrClient from "./cldrClient.mjs";

/** load the escaper's map from the server */
export async function updateEscaperFromServer() {
const client = await cldrClient.getClient();
const { body } = await client.apis.info.getEscapedCharInfo();
cldrEscaper.updateInfo(body); // update regex
}
15 changes: 14 additions & 1 deletion tools/cldr-apps/js/src/esm/cldrGui.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*/
import * as cldrAjax from "./cldrAjax.mjs";
import * as cldrDashContext from "./cldrDashContext.mjs";
import * as cldrEscaperLoader from "./cldrEscaperLoader.mjs";
import * as cldrEvent from "./cldrEvent.mjs";
import * as cldrForum from "./cldrForum.mjs";
import * as cldrInfo from "./cldrInfo.mjs";
Expand Down Expand Up @@ -55,7 +56,19 @@ function run() {
} catch (e) {
return Promise.reject(e);
}
return ensureSession().then(completeStartupWithSession);
// We load
return initialSetup().then(completeStartupWithSession);
}

/** Hook for loading all things we want loaded - locales, menus, etc */
async function initialSetup() {
await Promise.all([
ensureSession(), // that we have a session
// any other things can go here
cldrEscaperLoader.updateEscaperFromServer(),
// TODO: locale map
// TOOD: initial menus
]);
}

async function ensureSession() {
Expand Down
9 changes: 4 additions & 5 deletions tools/cldr-apps/js/src/esm/cldrTable.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import * as cldrAjax from "./cldrAjax.mjs";
import * as cldrCoverage from "./cldrCoverage.mjs";
import * as cldrDashContext from "./cldrDashContext.mjs";
import * as cldrDom from "./cldrDom.mjs";
import * as cldrEscaper from "./cldrEscaper.mjs";
import * as cldrEvent from "./cldrEvent.mjs";
import * as cldrGui from "./cldrGui.mjs";
import * as cldrInfo from "./cldrInfo.mjs";
Expand Down Expand Up @@ -1010,13 +1011,11 @@ function showItemInfoFn(theRow, item) {
*/
function checkLRmarker(field, value) {
if (value) {
if (value.indexOf("\u200E") > -1 || value.indexOf("\u200F") > -1) {
value = value
.replace(/\u200E/g, '<span class="visible-mark">&lt;LRM&gt;</span>')
.replace(/\u200F/g, '<span class="visible-mark">&lt;RLM&gt;</span>');
const escapedValue = cldrEscaper.getEscapedHtml(value);
if (escapedValue) {
const lrm = document.createElement("div");
lrm.className = "lrmarker-container";
lrm.innerHTML = value;
lrm.innerHTML = escapedValue;
field.appendChild(lrm);
}
}
Expand Down
41 changes: 41 additions & 0 deletions tools/cldr-apps/js/test/nonbrowser/test-cldrEscaper.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { expect } from "chai";
import mocha from "mocha";

import * as cldrEscaper from "../../src/esm/cldrEscaper.mjs";

function uplus(ch) {
if (!ch) return ch;
return "U+" + Number(ch.codePointAt(0)).toString(16);
}

describe("cldrEscaper test", function () {
describe("LRM/RLM test", function () {
for (const ch of ["\u200E", "\u200F", "\uFFF0"]) {
it(`returns true for ${uplus(ch)}`, function () {
expect(cldrEscaper.needsEscaping(ch)).to.be.ok;
});
}
for (const ch of [undefined, false, null, " ", "X"]) {
it(`returns false for ${uplus(ch)}`, function () {
expect(cldrEscaper.needsEscaping(ch)).to.not.be.ok;
});
}
});
describe("Escaping Test", () => {
it(`Should return undefined for a non-escapable str`, () => {
expect(cldrEscaper.getEscapedHtml(`dd/MM/y`)).to.not.be.ok;
});
it(`Should return HTML for a non-escapable str`, () => {
const html = cldrEscaper.getEscapedHtml(`dd‏/MM‏/y`); // U+200F / U+200F here
expect(html).to.be.ok;
expect(html).to.contain('class="visible-mark"');
expect(html).to.contain("RLM");
});
it(`Should return hex for a unknown str`, () => {
const html = cldrEscaper.getEscapedHtml(`\uFFF0`); // U+200F / U+200F here
expect(html).to.be.ok;
expect(html).to.contain('class="visible-mark"');
expect(html).to.contain("U+FFF0");
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.unicode.cldr.web.api;

import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.media.Content;
import org.eclipse.microprofile.openapi.annotations.media.Schema;
import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;
import org.eclipse.microprofile.openapi.annotations.responses.APIResponses;
import org.eclipse.microprofile.openapi.annotations.tags.Tag;
import org.unicode.cldr.util.CodePointEscaper;

@Path("/info/chars")
@Tag(name = "info", description = "General Information")
public class CharInfo {

@GET
@Produces(MediaType.APPLICATION_JSON)
@Operation(
summary = "Get Escaping Map",
description = "This returns a list of escapable characters")
@APIResponses(
value = {
@APIResponse(
responseCode = "200",
description = "Results of Character request",
content =
@Content(
mediaType = "application/json",
schema = @Schema(implementation = EscapedCharInfo.class))),
})
public Response getEscapedCharInfo() {
return Response.ok(EscapedCharInfo.INSTANCE).build();
}

/** unpacks the enum into a struct */
public static final class EscapedCharEntry {
public final String name;
public final String shortName;
public final String description;

public EscapedCharEntry(final CodePointEscaper c) {
name = c.name();
shortName = c.getShortName();
description = c.getDescription();
}
}

public static final class EscapedCharInfo {
public final String forceEscapeRegex =
CodePointEscaper.ESCAPE_IN_SURVEYTOOL.toPattern(true);
public final Map<String, EscapedCharEntry> names = new HashMap<>();

EscapedCharInfo() {
for (final CodePointEscaper c : CodePointEscaper.values()) {
names.put(c.getString(), new EscapedCharEntry(c));
}
}

/** Constant data, so a singleton is fine */
public static final EscapedCharInfo INSTANCE = new EscapedCharInfo();
}
}
6 changes: 6 additions & 0 deletions tools/cldr-apps/src/main/webapp/css/redesign.css
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,12 @@ h3.collapse-review > span:first-child {position: relative; top: -220px;display:
display: flex;
border: 1px solid #0FF;
}
.data .visible-mark::before{
content: '<';
}
.data .visible-mark::after{
content: '>';
}
.data .visible-mark{
background-color: #d9edf7;
border-color: #bce8f1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ public enum CodePointEscaper {
.removeAll(EMOJI_INVISIBLES)
.freeze();

/** set to be escaped in the surveytool */
public static final UnicodeSet ESCAPE_IN_SURVEYTOOL =
FORCE_ESCAPE.cloneAsThawed().remove(SP.getCodePoint()).freeze();

public static final UnicodeSet NON_SPACING = new UnicodeSet("[[:Mn:][:Me:]]").freeze();

public static final UnicodeSet FORCE_ESCAPE_WITH_NONSPACING =
Expand Down Expand Up @@ -254,7 +258,7 @@ public static String toUnescaped(String escaped) {
private static final String HAS_NAME = " ≡ ";

public static String toExample(int codePoint) {
CodePointEscaper cpe = _fromCodePoint.get(codePoint);
CodePointEscaper cpe = forCodePoint(codePoint);
if (cpe == null) { // hex
final String name = UCharacter.getExtendedName(codePoint);
return codePointToEscaped(codePoint)
Expand All @@ -267,6 +271,14 @@ public static String toExample(int codePoint) {
}
}

static CodePointEscaper forCodePoint(int codePoint) {
return _fromCodePoint.get(codePoint);
}

static CodePointEscaper forCodePoint(String str) {
return forCodePoint(str.codePointAt(0));
}

/**
* Returns a code point from an abbreviation string or hex string <b>without the escape
* brackets</b>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.unicode.cldr.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.junit.jupiter.api.Test;

/**
* @see org.unicode.cldr.unittest.UnicodeSetPrettyPrinterTest
*/
public class TestCodePointEscaper {
@Test
void testForEach() {
for (final CodePointEscaper e : CodePointEscaper.values()) {
assertEquals(e, CodePointEscaper.forCodePoint(e.getString()));
assertTrue(
CodePointEscaper.FORCE_ESCAPE.contains(e.getCodePoint()),
() -> "For " + e.name());
}
}
}
Loading