Skip to content

Commit 522890f

Browse files
committed
Improve name table encoding support
Most name strings should be encoded with UTF-16BE per the spec, but there are situations where other encodings are required or acceptable. This change only addresses a subset of potential encodings. fixes #70
1 parent 433396c commit 522890f

File tree

2 files changed

+59
-9
lines changed

2 files changed

+59
-9
lines changed

src/FontLib/Table/Type/name.php

+58-9
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,49 @@ protected function _parse() {
150150
$records[] = $record;
151151
}
152152

153+
$system_encodings = mb_list_encodings();
154+
$system_encodings = array_change_key_case(array_fill_keys($system_encodings, true), CASE_UPPER);
155+
153156
$names = array();
154157
foreach ($records as $record) {
155158
$font->seek($tableOffset + $data["stringOffset"] + $record->offset);
156-
$s = $font->read($record->length);
157-
$record->string = Font::UTF16ToUTF8($s);
159+
$record->stringRaw = $font->read($record->length);
160+
161+
$encoding = null;
162+
switch ($record->platformID) {
163+
case 3:
164+
switch ($record->platformSpecificID) {
165+
case 2:
166+
if (\array_key_exists("SJIS", $system_encodings)) {
167+
$encoding = "SJIS";
168+
}
169+
break;
170+
case 3:
171+
if (\array_key_exists("GB18030", $system_encodings)) {
172+
$encoding = "GB18030";
173+
}
174+
break;
175+
case 4:
176+
if (\array_key_exists("BIG-5", $system_encodings)) {
177+
$encoding = "BIG-5";
178+
}
179+
break;
180+
case 5:
181+
if (\array_key_exists("UHC", $system_encodings)) {
182+
$encoding = "UHC";
183+
}
184+
break;
185+
}
186+
break;
187+
}
188+
if ($encoding === null) {
189+
$encoding = "UTF-16";
190+
}
191+
192+
$record->string = mb_convert_encoding($record->stringRaw, "UTF-8", $encoding);
193+
if (strpos($record->string, "\0") !== false) {
194+
$record->string = str_replace("\0", "", $record->string);
195+
}
158196
$names[$record->nameID] = $record;
159197
}
160198

@@ -168,22 +206,33 @@ protected function _encode() {
168206

169207
/** @var nameRecord[] $records */
170208
$records = $this->data["records"];
171-
$count_records = count($records);
209+
$count_records = \count($records);
172210

173211
$this->data["count"] = $count_records;
174-
$this->data["stringOffset"] = 6 + $count_records * 12; // 6 => uint16 * 3, 12 => sizeof self::$record_format
212+
$this->data["stringOffset"] = 6 + ($count_records * 12); // 6 => uint16 * 3, 12 => sizeof self::$record_format
175213

176214
$length = $font->pack(self::$header_format, $this->data);
177215

178216
$offset = 0;
217+
218+
/** @var nameRecord[] $records_to_encode */
219+
$records_to_encode = array();
179220
foreach ($records as $record) {
180-
$record->length = mb_strlen($record->getUTF16(), "8bit");
181-
$record->offset = $offset;
182-
$offset += $record->length;
183-
$length += $font->pack(nameRecord::$format, (array)$record);
221+
$encoded_record = new nameRecord();
222+
$encoded_record->platformID = 3;
223+
$encoded_record->platformSpecificID = 1;
224+
$encoded_record->languageID = $record->languageID;
225+
$encoded_record->nameID = $record->nameID;
226+
$encoded_record->offset = $offset;
227+
$encoded_record->string = $record->string;
228+
$encoded_record->length = mb_strlen($encoded_record->getUTF16(), "8bit");
229+
$records_to_encode[] = $encoded_record;
230+
231+
$offset += $encoded_record->length;
232+
$length += $font->pack(nameRecord::$format, (array)$encoded_record);
184233
}
185234

186-
foreach ($records as $record) {
235+
foreach ($records_to_encode as $record) {
187236
$str = $record->getUTF16();
188237
$length += $font->write($str, mb_strlen($str, "8bit"));
189238
}

src/FontLib/Table/Type/nameRecord.php

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class nameRecord extends BinaryStream {
2323
public $length;
2424
public $offset;
2525
public $string;
26+
public $stringRaw;
2627

2728
public static $format = array(
2829
"platformID" => self::uint16,

0 commit comments

Comments
 (0)