Skip to content

Commit 4294913

Browse files
committed
Generate Unicode character map for non-Unicode cmap tables
This librarry relies on Unicode data for a number of functions (namely font re-encoding). Without a Unicode-encoded cmap table those functions do not work as expected.
1 parent cadff2e commit 4294913

File tree

1 file changed

+112
-1
lines changed

1 file changed

+112
-1
lines changed

src/FontLib/TrueType/File.php

+112-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,72 @@ class File extends BinaryStream {
101101
"Ccaron", "ccaron", "dmacron"
102102
);
103103

104+
private function uniord (string $c, string $encoding = null) {
105+
if (function_exists("mb_ord")) {
106+
if (PHP_VERSION_ID < 80000 && $encoding === null) {
107+
// in PHP < 8 the encoding argument, if supplied, must be a valid encoding
108+
$encoding = "UTF-8";
109+
}
110+
return mb_ord($c, $encoding);
111+
}
112+
113+
if ($encoding != "UTF-8" && $encoding !== null) {
114+
$c = mb_convert_encoding($c, "UTF-8", $encoding);
115+
}
116+
117+
$length = mb_strlen(mb_substr($c, 0, 1), '8bit');
118+
$ord = false;
119+
$bytes = [];
120+
$numbytes = 1;
121+
for ($i = 0; $i < $length; $i++) {
122+
$o = \ord($c[$i]); // get one string character at time
123+
if (\count($bytes) === 0) { // get starting octect
124+
if ($o <= 0x7F) {
125+
$ord = $o;
126+
$numbytes = 1;
127+
} elseif (($o >> 0x05) === 0x06) { // 2 bytes character (0x06 = 110 BIN)
128+
$bytes[] = ($o - 0xC0) << 0x06;
129+
$numbytes = 2;
130+
} elseif (($o >> 0x04) === 0x0E) { // 3 bytes character (0x0E = 1110 BIN)
131+
$bytes[] = ($o - 0xE0) << 0x0C;
132+
$numbytes = 3;
133+
} elseif (($o >> 0x03) === 0x1E) { // 4 bytes character (0x1E = 11110 BIN)
134+
$bytes[] = ($o - 0xF0) << 0x12;
135+
$numbytes = 4;
136+
} else {
137+
$ord = false;
138+
break;
139+
}
140+
} elseif (($o >> 0x06) === 0x02) { // bytes 2, 3 and 4 must start with 0x02 = 10 BIN
141+
$bytes[] = $o - 0x80;
142+
if (\count($bytes) === $numbytes) {
143+
// compose UTF-8 bytes to a single unicode value
144+
$o = $bytes[0];
145+
for ($j = 1; $j < $numbytes; $j++) {
146+
$o += ($bytes[$j] << (($numbytes - $j - 1) * 0x06));
147+
}
148+
if ((($o >= 0xD800) and ($o <= 0xDFFF)) or ($o >= 0x10FFFF)) {
149+
// The definition of UTF-8 prohibits encoding character numbers between
150+
// U+D800 and U+DFFF, which are reserved for use with the UTF-16
151+
// encoding form (as surrogate pairs) and do not directly represent
152+
// characters.
153+
return false;
154+
} else {
155+
$ord = $o; // add char to array
156+
}
157+
// reset data for next char
158+
$bytes = [];
159+
$numbytes = 1;
160+
}
161+
} else {
162+
$ord = false;
163+
break;
164+
}
165+
}
166+
167+
return $ord;
168+
}
169+
104170
function getTable() {
105171
$this->parseTableEntries();
106172

@@ -157,7 +223,7 @@ function utf8toUnicode($str) {
157223
function getUnicodeCharMap() {
158224
$subtable = null;
159225
foreach ($this->getData("cmap", "subtables") as $_subtable) {
160-
if ($_subtable["platformID"] == 0 || $_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1) {
226+
if ($_subtable["platformID"] == 0 || ($_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1)) {
161227
$subtable = $_subtable;
162228
break;
163229
}
@@ -167,6 +233,51 @@ function getUnicodeCharMap() {
167233
return $subtable["glyphIndexArray"];
168234
}
169235

236+
$system_encodings = mb_list_encodings();
237+
$system_encodings = array_change_key_case(array_fill_keys($system_encodings, true), CASE_UPPER);
238+
foreach ($this->getData("cmap", "subtables") as $_subtable) {
239+
$encoding = null;
240+
switch ($_subtable["platformID"]) {
241+
case 3:
242+
switch ($_subtable["platformSpecificID"]) {
243+
case 2:
244+
if (\array_key_exists("SJIS", $system_encodings)) {
245+
$encoding = "SJIS";
246+
}
247+
break;
248+
case 3:
249+
if (\array_key_exists("GB18030", $system_encodings)) {
250+
$encoding = "GB18030";
251+
}
252+
break;
253+
case 4:
254+
if (\array_key_exists("BIG-5", $system_encodings)) {
255+
$encoding = "BIG-5";
256+
}
257+
break;
258+
case 5:
259+
if (\array_key_exists("UHC", $system_encodings)) {
260+
$encoding = "UHC";
261+
}
262+
break;
263+
}
264+
break;
265+
}
266+
if ($encoding) {
267+
$glyphIndexArray = array();
268+
foreach ($_subtable["glyphIndexArray"] as $c => $gid) {
269+
$str = trim(pack("N", $c));
270+
if (\strlen($str) > 0) {
271+
$ord = $this->uniord($str, $encoding);
272+
if ($ord > 0) {
273+
$glyphIndexArray[$ord] = $gid;
274+
}
275+
}
276+
}
277+
return $glyphIndexArray;
278+
}
279+
}
280+
170281
return null;
171282
}
172283

0 commit comments

Comments
 (0)