forked from xndcn/pebble-firmware-utils
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathtranslate.py
executable file
·556 lines (530 loc) · 26.6 KB
/
translate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
#!/usr/bin/env python
# This script updates strings in tintin_fw.bin file
import sys
from struct import pack, unpack
# data is a loaded tintin_fw file contents
data = ""
# datap is an original file converted to list of integers (pointers)
datap = []
# datar is data to return
datar = ""
EOF = 0x70000 - 48
# where to write logs
log = sys.stdout
def is_valid_pointer(n):
""" Checks if a number looks like a valid pointer """
return n >= 0x08010000 and n < (0x08010000+len(data))
def is_string_pointer(ptr):
"""
Checks if a number points to somthing similar to string;
returns string (maybe empty) if it is a valid string or False otherwise
"""
def is_string_char(c):
return c in "\t\r\n" or (c >= ' ' and c <= '~') # tab, endline or printable latin
if not is_valid_pointer(ptr):
return False
for i in range(ptr-0x08010000, len(data)):
if data[i] == '\0':
#return i - (ptr-0x08010000) # line ended without non-string chars, return strlen
return data[ptr-0x08010000:i] # line ended without non-string chars, return it
if not is_string_char(data[i]):
return False # encountered non-string char, return False
return False # reched end of file, return False
def find_all_strings():
"""
Scans input file for all referenced strings.
Returns array of tuples: (offset, value, string)
"""
pointers = [] # tuples: offset to pointer, offset to its string, the string itself
for i, n in enumerate(datap):
s = is_string_pointer(n)
if s:
#print >>log, i,n,s
pointers.append((i, n, s))
return pointers
def find_pointers_to_offset(offset):
"""
Finds all pointers to given offset; returns offsets to them
"""
ptr = offset + 0x08010000
return [i for i,v in enumerate(datap) if v == ptr]
def find_string_offsets(s):
""" Returns list of offsets to given string """
ret = []
s = s + '\0' # string in file must end with \0 !
i = data.find(s)
while i != -1:
ret.append(i)
i = data.find(s, i+1)
return ret
def parse_args():
def hexarg(x):
try:
return x.decode("hex")
except:
return int(x,0)
import argparse
parser = argparse.ArgumentParser(
description="Translation helper for Pebble firmware",
epilog="Strings format:\nOriginal String:=Translated String\n"+
"Any newlines in strings must be replaced with '\\n', any backslashes with '\\\\'.\n"+
"Lines starting with # are comments, so if you need # at line start replace it with \\#.\n"+
"Lines starting with ! are those which may be translated 'in place' "+
"(for strings which have free space after them).")
parser.add_argument("tintin", nargs='?', default="tintin_fw.bin", type=argparse.FileType("rb"),
help="Input tintin_fw file, defaults to tintin_fw.bin")
parser.add_argument("output", nargs='?', default=sys.stdout, type=argparse.FileType("wb"),
help="Output file, defaults to stdout")
parser.add_argument("-s", "--strings", default=sys.stdin, type=argparse.FileType("r"),
help="File with strings to translate, by default will read from stdin")
group = parser.add_mutually_exclusive_group()
group.add_argument("-t", "--txt", dest="old_format", action="store_true",
help="Use old (custom, text-based) format for strings")
group.add_argument("-g", "--gettext", "--po", dest="old_format", action="store_false",
help="Use gettext's PO format for strings (default)")
parser.add_argument("-x", "--exclude", "--exclude-strings", action="append", metavar="REF", default=[],
help="Don't translate strings with given reference ID (only for PO files). "+
"This option may be passed several times.")
parser.add_argument("-p", "--print-only", action="store_true",
help="Don't translate anything, just print out all referenced strings from input file")
parser.add_argument("-f", "--force", action="store_true",
help="Disable safety checks for inplace translations")
parser.add_argument("-r", "--range", action="append", nargs=2, metavar=("start","end"), type=lambda x: int(x,0),
dest="ranges",
help="Offset range to use for translated messages (in addition to space at the end of file). "+
"Use this to specify unneeded firmware parts, e.g. debugging console or disabled watchfaces. "+
"Values may be either 0xHex, Decimal or 0octal. This option may be repeated.")
parser.add_argument("-R", "--range-mask", action="append", nargs=3, metavar=("start","end","size"),
type=hexarg, dest="ranges",
help="Ranges defined by signatures: START and END are hex signatures of first and last bytes "+
"of range. For example, -R 48656C6C6F 3031323334 0x243 will select range of 0x243 bytes "+
"starting with 'Hello' and ending with '12345'. "+
"You must always specify range size for checking.")
parser.add_argument("-e", "--end", action="append_const", const="append", dest="ranges",
help="Use space between end of firmware and 0x08080000 (which seems to be the last address "+
"allowed) to store strings. Note that this will change size of firmware binary "+
"which may possible interfere with iOS Pebble app.")
parser.add_argument("-u", "--reuse-ranges", action="store_true",
help="Reuse freed (fully moved on translation) strings as ranges for next strings. "+
"This may slow process as every character needs to be checked for possible pointers.")
return parser.parse_args()
def read_strings_txt(f):
strings = {}
keys = []
inplace = []
for line in f:
line = line[:-1] # remove trailing \n
if len(line) == 0 or line.startswith('#'): # comment or empty
continue
line = line.replace('\\n', '\n').replace('\\#', '#').replace('\\\\', '\\') # unescape
if not ':=' in line:
print >>log, "Warning: bad line in strings:", line
continue
left, right = line.split(':=', 1)
if not right: # empty
print >>log, "Warning: translation is empty; ignoring:", line
continue
if ':=' in right:
print >>log, "Warning: ambigous line in strings:", line
continue
if left.startswith('!'): # inplace translating
left = left[1:]
inplace.append(left)
if left in strings:
print >>log, "Warning: duplicate string, ignoring:", line
print >>log, "Original: "+strings[left]
continue
strings[left] = right
keys.append(left)
return strings, keys, inplace
def read_strings_po(f, exclude=[]):
# TODO : multiline strings w/o \n
def parsevalline(line, kwlen): # kwlen is keyword length
line = line[kwlen :].strip() # remove 'msgid' and spaces
if line[0] == '"':
if line[-1] != '"':
print >>log, "Warning! Expected '\"' not found in line %d" % line
line = line[1 :-1] # remove quotes
line = line.replace('\\n', '\n').replace('\\"', '"').replace('\\\\', '\\') # unescape - TODO: test
return line
strings = {}
keys = []
inplaces = []
# our scratchpad
left = None
right = None
inplace = False
ref = None
context = None
skipnum = 0 # number of excluded lines
for line in f:
line = line[:-1] # remove tralining \n
if len(line) == 0 : # end of record
if ref in exclude:
#print >>log, "Line %s has ref <%s> which is requested to be excluded; skipping" % (repr(left), ref)
skipnum += 1
elif left: # or else, if left is empty -> ignoring
if right: # both left and right are provided
# FIXME: support inplace for contexted lines? do we need this at all?
if left == right:
print >>log, "Translation = original, ignoring line %s" % left
elif left in keys:
if context or type(strings[left]) is list: # this or previous is contexted
if type(strings[left]) is not list:
strings[left] = [strings[left]] # convert to list
# because POEditor omits lines with msgctxt=0
if context == None:
context = [0] # for the same reason as above
for c in context:
if len(strings[left]) <= c:
strings[left] += [None] * (len(strings[left])-c)
strings[left].append(right)
else: # have such item already
if strings[left][c]:
print >>log, "Warning: duplicate contexted line %s @ %d" % (left, c)
else:
strings[left][c] = right
else:
print >>log, "Warning: ignoring duplicate line %s" % left
else:
keys.append(left)
if context != None:
r = [None] * (max(context)+1)
for c in context:
r[c] = right
strings[left] = r
else:
strings[left] = right
if inplace:
inplaces.append(left)
else: # only left provided -> line untranslated, ignoring
print >>log, "Ignoring untranslated line %s" % left
# now clear scratchpad
left = None
right = None
inplace = False
ref = None
context = None
elif line.startswith("#,"): # flags
flags = [x.strip() for x in line[2 :].split(",")] # parse flags, removing leading "#,"
if "fuzzy" in flags:
inplace = True
# ignore all other flags, if any
elif line.startswith("#:"): # reference
ref = line[2 :].strip()
elif line.startswith("#"): # comment, etc
pass # ignore
elif line.startswith("msgid"):
left = parsevalline(line, 5)
elif line.startswith("msgstr"):
right = parsevalline(line, 6)
elif line.startswith("msgctxt"):
context = []
for num in parsevalline(line, 7).split(','):
# inplace flag, to replace "fuzzy" usage
if num.lower() == 'inplace':
inplace = True
continue
# should be a number
try:
context.append(int(num))
except ValueError:
print >>log, "*** ERROR: %s is not an integer "
"or comma-separated list of integers "
"and not a supported flag (line %s)" % (num, line)
if not context: # only inplace flag
context = None
elif line.startswith('"'): # continuation?
if right is not None:
right += parsevalline(line, 0)
elif left is not None:
left += parsevalline(line, 0)
else:
print >>log, "Warning: unexpected continuation line: %s" % line
else:
print >>log, "Warning: unexpected line in input: %s" % line
if skipnum:
print >>log, "Excluded %d lines as requested" % skipnum
return strings, keys, inplaces
def translate_fw(args):
global data, datap, datar, log
if args.output == log == sys.stdout:
log = sys.stderr # if writing new tintin to sdout, print >>log, all messages to stderr to avoid cluttering
# load source fw:
data = args.tintin.read()
datar = data # start from just copy, later will change it
# convert to pointers:
for i in range(0, len(data)-3): # each 4-aligned int; -3 to avoid last (partial) value.
# Also include not-aligned values
n = unpack("I", data[i:i+4])[0]
datap.append(n)
ranges = []
def addrange(start, end):
""" Check range for clashes and then add it to ranges list """
for r in list(ranges):
if r[0] == r[1]: # singular range
ranges.remove(r) # remove as it is unneeded
continue # to next range
if start == r[0] and end == r[1]: # duplicate
print >>log, "### Duplicate range %x-%x, skipping." % (start, end)
return
if start >= r[0] and end <= r[1]: # fully inside; ignore
print >>log, "### Range clash!! This must be an error! Range %x-%x fits within %x-%x; ignoring" % (
start, end, r[0], r[1])
return
if start <= r[0] and end >= r[1]:
# fully outside; replace. FIXME : this might introduce clashes with other ranges
print >>log, "### Range clash!! This must be an error! Range %x-%x contained in %x-%x; replacing" % (
start, end, r[0], r[1])
r[0] = start
r[1] = end
return
if start <= r[0] and end > r[0]: # clash with beginning; truncate
print >>log, "### Range clash!! This must be an error! Range %x-%x clashes with %x-%x; truncating" % (
start, end, r[0], r[1])
end = r[0]
if start < r[1] and end >= r[1]: # clash with end; truncate
print >>log, "### Range clash!! This must be an error! Range %x-%x clashes with %x-%x; truncating" % (
start, end, r[0], r[1])
start = r[1]
for r in ranges: # another loop for neighbours - now when we surely have no clashes
if r[1] == start:
print >>log, " # Range neighbourhood, merging %x-%x to %x-%x" % (
start, end, r[0], r[1])
r[1] = end
return
if end == r[0]:
print >>log, " # Range neighbourhood, merging %x-%x to %x-%x" % (
start, end, r[0], r[1])
r[0] = start
return
ranges.append([start, end])
for r in args.ranges or []:
if len(r) == 3: # signature-specified range - convert it to offsets
if type(r[0]) != str or type(r[1]) != str or type(r[2]) != int:
print >>log, "-Warning: invalid range mask specification %s; ignoring" % repr(r)
continue
start = data.find(r[0])
if start < 0:
print >>log, "-Warning: starting mask %s not found, ignoring this range" % repr(r[0])
continue
end = start+data[start:].find(r[1])
if end < start:
print >>log, "-Warning: start at 0x%X, ending mask %s not found, ignoring this range" % (start, repr(r[1]))
continue
length = end + len(r[1]) - start
if length != r[2]:
print >>log, ("-Warning: length mismatch for range %s..%s (0x%X..0x%X), expected %d, found %d; "+
"ignoring this range") % (repr(r[0]), repr(r[1]), start, end, r[2], length)
continue
end += len(r[1]) # append ending mask size
addrange(start, end)
elif len(r) == 2:
addrange(r[0], r[1])
elif r == "append":
start = len(datar)
end = EOF
if start < end:
addrange(start, end)
else:
print >>log, "Warning: cannot append to end of file because its size is >= 0x70000 (max fw size)"
else:
print >>log, "?!? confused: unexpected range", r
if ranges:
print >>log, "Using following ranges:"
for r in ranges:
print >>log, " * 0x%X..0x%X (%d bytes)" % (r[0], r[1], r[1]-r[0])
elif len(ranges) == 0:
print >>log, "WARNING: no usable ranges!"
if args.print_only:
print >>log, "Scanning tintin_fw..."
ptrs = find_all_strings()
print >>log, "Found %d referenced strings" % len(ptrs)
for p in ptrs:
args.output.write(p[2]+'\n')
args.output.close()
sys.exit(0)
if args.old_format:
strings, keys, inplace = read_strings_txt(args.strings)
else:
strings, keys, inplace = read_strings_po(args.strings, args.exclude)
print >>log, "Got %d valid strings to translate" % len(strings)
if not strings:
print >>log, "NOTICE: No strings, nothing to do! Will just duplicate fw"
npass = 0
while True:
untranslated = 0 # number of strings we could not translate because of range lack
translated = 0 # number of strings translated in this pass
for key in list(keys): # use clone to avoid breaking on removal
val = strings[key] # string or list
vals = val if type(val) is list else [val]
print >>log, "Processing", repr(key)
os = find_string_offsets(key)
if not os: # no such string
print >>log, " -- not found, ignoring"
continue
if type(val) is list: # contexted
if len(os) < len(val):
print >>log, " ** Warning: too many contexts given for %s" % key
elif len(os) > len(val):
#print >>log, " ** Warning: too few contexts given for %s" % key
# not all contexts may need to be translated
val += [None] * (len(os) - len(val)) # pad it with Nones to avoid Index out of bounds
mustrepoint=[] # list of "inplace" key occurances which cannot be replaced inplace
if (type(val) is not list # val is not contexted
and (len(val) <= len(key) or key in inplace)): # can just replace
# but will not replace contexted vals
print >>log, " -- found %d occurance(s), replacing" % len(os)
for idx, o in enumerate(os):
doreplace = True
print >>log, " -- 0x%X:" % o,
if key in inplace and len(val) > len(key) and not args.force: # check that "rest" has only \0's
rest = datar[o+len(key):o+32]
for i in range(len(rest)):
if rest[i] != '\0':
print >>log, " ** SKIPPING because overwriting is unsafe here; use -f to override. "+\
"Will try to rewrite pointers."
mustrepoint.append(o)
doreplace = False # don't replace this occurance
break # break inner loop
# Now check for optimized links:
# Hello_World\0
# ^p1 ^p2
# - here we cannot just translate inplace "Hello_World" to
# "Bonjour" as it world result in the following:
# Bonjour\0ld\0
# ^p1 ^p2
for i in range(o+1, # there definitely is a pointer to o
o+min(len(key)+1,len(val)+1)): # use min because we don't need to worry about the rest
if find_pointers_to_offset(i):
print >>log, " ** SKIPPING "+\
"because there are links to the rest of the string due to optimization; "+\
"will try to rewrite pointers."
mustrepoint.append(o)
doreplace = False
break
if not doreplace:
continue # skip to next occurance, this will be handled later
oldlen = len(datar)
datar = datar[0:o] + val + '\0' + datar[o+len(val)+1:]
if len(datar) != oldlen:
raise AssertionError("Length mismatch")
print >>log, "OK" # this occurance replaced successfully
if not mustrepoint:
keys.remove(key) # this string is translated
translated += 1
continue # everything replaced fine for that key
# we are here means that new string is longer than old (and not an
# inplace one - or at least has one non-inplace-possible occurance)
# so will add it to end of tintin file or to ranges
print >>log, " -- %s %d occurance(s), looking for pointers" % ("still have" if mustrepoint else "found", len(mustrepoint or os))
ps = []
for o in list(mustrepoint) or list(os): # use mustrepoint if it is not empty
newps = find_pointers_to_offset(o)
ps.extend(newps)
if not newps:
print >>log, " !? String at 0x%X is unreferenced, will ignore! (must be partial or something)" % o
# and remove it from list (needed for reuse_ranges)
if mustrepoint:
mustrepoint.remove(o)
else:
os.remove(o)
if not ps:
print >>log, " !! No pointers to that string, cannot translate!"
continue
print >>log, " == found %d ptrs; appending or inserting string and updating them" % len(ps)
stored = {}
key_translated = True
for idx, v in enumerate(vals): # for each contexted value (or for the only value)
if v == None:
continue # skip empty ones
if idx >= len(ps):
print >>log, " *! Warning: no pointers for given context %d" % idx
continue
if v in stored: # such string was already stored
newps = stored[v]
print >>log, " -- using stored ptr"
else:
r = None # range to use
for rx in sorted(ranges, key=lambda r: r[1]-r[0]):
if rx[1]-rx[0] >= len(v)+1: # this range have enough space
r = rx
break # break inner loop (on ranges)
if not r: # suitable range not found
print >>log, " ## Notice: no (more) ranges available large enough for this phrase. Will skip it."
untranslated += 1
key_translated = False
continue # to next value variant
print >>log, " -- using range 0x%X-0x%X%s" % (r[0],r[1]," (end of file)" if r[1] == EOF else "")
newp = r[0]
oldlen = len(datar)
datar = datar[0:newp] + v + '\0' + datar[newp+len(v)+1:]
if len(datar) != oldlen and r[1] != EOF: #70000 is "range" at the end of file
raise AssertionError("Length mismatch")
r[0] += len(v) + 1 # remove used space from that range
newp += 0x08010000 # convert from offset to pointer
newps = pack('I', newp)
stored[v] = newps
for pidx, p in enumerate(ps): # now update pointers
if len(vals) > 1: # if contexted
if pidx >= len(vals):
print >>log, " *! Warning: exceeding pointer %d for context %d" % (pidx, idx)
if idx != pidx:
continue # skip irrelevant pointers
oldlen = len(datar)
datar = datar[0:p] + newps + datar[p+4:]
if len(datar) != oldlen:
raise AssertionError("Length mismatch")
if key_translated and key in keys:
keys.remove(key) # as it is translated now
translated += 1
# now that string is translated, we may reuse its place as ranges
if key_translated and args.reuse_ranges:
for o in mustrepoint or os:
i = o+1
while i < len(data):
if find_pointers_to_offset(i): # string is overused starting from this point
break
if data[i] == '\0' : # last byte
i += 1 # include it too
break
i += 1
addrange(o, i)
print >>log, " ++ Reclaimed %d bytes from this string" % (i-o)
npass += 1
print >>log, "Pass %d completed." % npass
sizes = [r[1]-r[0] for r in ranges]
print >>log, "Remaining space at this point: %d bytes scattered in %d ranges, max range size is %d bytes" % \
(sum(sizes), len(ranges), max(sizes or [0]))
print >>log
if not args.reuse_ranges: # new ranges definitely could not appear
break
if len(keys) == 0:
print >>log, "All strings are translated. Enjoy!"
break
if untranslated == 0:
print >>log, "No more exceeding strings. Nice."
break
if translated == 0:
print >>log, "Nothing changed in this pass; giving up."
break
print >>log, "Translated %d strings in this pass; let's try to translate %d remaining" % (translated, untranslated)
untranslated = 0 # restart counter as we will retry all these strings
if keys:
print >>log, "Strings still not translated:"
print >>log, '\n'.join(["* "+k for k in keys])
else:
print >>log, "Everything translated. Hooray!"
print >>log, "Saving..."
if len(datar) != len(data): # something appended
datar += data[-48:] # add ending bytes - needed for iOS app
args.output.write(datar)
args.output.close()
print >>log, "Done."
if untranslated:
print >>log, "WARNING: Couldn't translate %d strings because of ranges lack." % untranslated
else:
print >>log, "I think that all the strings were translated successfully :-)"
if __name__ == "__main__":
args = parse_args()
translate_fw(args)