-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_elf.py
executable file
·358 lines (310 loc) · 13 KB
/
plot_elf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
#!/usr/bin/env python3
# https://github.com/eliben/pyelftools/wiki/User's-guide
# https://man7.org/linux/man-pages/man5/elf.5.html
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.lines import Line2D
import sys
import io
import os.path
import re
import hashlib
from elftools.common.exceptions import ELFError
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment
import typing
from typing import List, Dict, Tuple, Any, Collection, Set
# Workaround because numpy and the typing system don't work very well together atm...
NpArray = Any
class PlottingOptions:
selected_parts: Set[str]
strip: bool = False
def __init__(self):
self.selected_parts = set()
class ElfFileData:
byte_data: NpArray = None
elf_data: ELFFile = None
def error(msg: str) -> None:
"""
Print a message and quit with non-zero exit code.
:param msg: "Error: " + the message to print.
"""
print("Error: {}".format(msg))
sys.exit(1)
def load_elf_files(filenames: Collection[str]) -> Dict[str,ElfFileData]:
"""
Load the ELF files and read their ELF data and byte data.
:param filenames: The names of the files to load.
:return: A dict from filename to ElfFileData objects, which contain the
ELF data and byte data.
"""
result: Dict[str,ElfFileData] = dict()
for f in filenames:
with open (f, "rb") as elffile:
raw_data = elffile.read()
try:
result[f] = ElfFileData()
result[f].byte_data = np.frombuffer(raw_data, dtype="uint8")
result[f].elf_data = ELFFile(io.BytesIO(raw_data))
except ELFError:
error("Not a valid ELF file: \"{}\"".format(f))
return result
def get_max_length(arrays: Collection[Collection]) -> int:
"""
Get the maximum length of the supplied collections.
:param arrays: The collections to analyze.
:return: The maximum length.
"""
max_length: int = 0
for a in arrays:
size: int = len(a)
if size > max_length:
max_length = size
return max_length
def pad_array(array: NpArray, length: int) -> NpArray:
"""
Pad the given array to the given length and fill the rest with (255,0,255).
:param array: The array to pad.
:param length: The length to pad the array to.
:return: The padded array.
"""
result: NpArray = np.ones((length,3), dtype=array.dtype) * (255,0,255)
result[:len(array)] = array
return result
def filter_parts(parts: List[Tuple[str,int,int]], selected_parts: Collection[str]) -> List[Tuple[str,int,int]]:
"""
Filter the given parts with the filter list supplied.
:param parts: The input list.
:param selected_parts: A set of strings that may be equal to parts (headers /
sections) to keep or may be a regex enclose in
slashes ("/regex/").
:return: The filtered list. If selected parts is empty, everything will be kept.
"""
if len(selected_parts) == 0:
return parts
result: List[Tuple[str,int,int]] = []
for (name,offset,length) in parts:
for filter in selected_parts:
if filter.startswith("/") and filter.endswith("/"):
if re.match(filter[1:-1], name):
result.append((name,offset,length))
else:
if filter == name:
result.append((name,offset,length))
return result
def get_parts(elf_files: Dict[str,ElfFileData], options: Dict[str,PlottingOptions]) -> Dict[str, List[Tuple[str, int, int]]]:
"""
Get a list of parts for all input files.
:param elf_files: A dict from filename to ElfFileData objects that contains
the ELF and byte data.
:param options: A dict from filename to PlottingOptions objects which
determine selected sections and stripping.
:return: A dict from filename to lists of parts.
"""
result = dict()
for f in elf_files:
elf: ELFFile = elf_files[f].elf_data
selected_parts: Set[str] = options[f].selected_parts
part_names = ["Ehdr"]
part_offsets = [0]
part_lengths = [elf.header.e_ehsize]
for s in elf.iter_sections():
part_names.append(s.name)
part_offsets.append(part_offsets[-1] + part_lengths[-1])
part_lengths.append(s.data_size)
part_names.extend(["Phdr", "Shdr"])
part_offsets.extend([elf.header.e_phoff, elf.header.e_shoff])
part_lengths.extend([elf.header.e_phentsize, elf.header.e_shentsize])
parts = list(zip(part_names, part_offsets, part_lengths))
parts = [(name,offset,length) for (name,offset,length) in parts if length > 0]
parts = filter_parts(parts, selected_parts)
if len(parts) == 0:
error("No parts in \"{}\" match selection {}".format(f,selected_parts))
result[f] = parts
return result
num_colors = 3600
colors: NpArray = cm.rainbow(np.linspace(0, 1, num_colors))
colors = colors[:,0:3]
saved_colors: Dict[str,NpArray] = dict()
def get_color(text: str) -> NpArray:
"""
Get a color for the given text that will always be the same.
:param text: The input text.
:return: A color as RGB value.
"""
if text in saved_colors:
color = saved_colors[text]
else:
color = colors[int(hashlib.md5(text.encode()).hexdigest(), 16) % num_colors]
saved_colors[text] = color
return color
def colorize_data(elf_files: Dict[str,ElfFileData], parts: Dict[str, List[Tuple[str, int, int]]]) -> Dict[str,List[Tuple[str,Line2D]]]:
"""
Colorize the ELF header, Program Header, Section Header, and various sections
(e.g. .text, .date, …) of the given bytes of the ELF Object Code Files with
different colors on the HSV spectrum and prepare a legend for the plot.
:param elf_files: A dict from filename to ElfFileData objects that contains
the ELF and byte data.
:param parts: A dict from filename to parts to keep.
:return: A dict from filename to the names of the parts of the data and to
the Line2D objects containing the colors of the parts of the data.
"""
legend_data: Dict[str,List[Tuple[str,Line2D]]] = dict()
for f in elf_files:
bytes: NpArray = elf_files[f].byte_data
bytes = np.stack((bytes,bytes,bytes), axis=1)
current_parts = parts[f]
current_legend_data = []
for name, offset, length in current_parts:
color = get_color(name)
bytes[offset : offset+length] = (bytes[offset : offset+length] * color).astype("uint8")
current_legend_data.append((name, Line2D([0], [0], color=color, lw=4)))
legend_data[f] = current_legend_data
elf_files[f].byte_data = bytes
return legend_data
def strip_data(elf_files: Dict[str,ElfFileData], parts: Dict[str, List[Tuple[str, int, int]]], options: Dict[str,PlottingOptions]) -> None:
"""
If specified in options, strip away all the parts we don't want to highlight.
:param elf_files: A dict from filename to ElfFileData objects that contains
the ELF and byte data.
:param parts: A dict from filename to parts to keep.
:param options: A dict from filename to PlottingOptions objects which
determine selected sections and stripping.
"""
for f in elf_files:
current_parts = parts[f]
bytes: NpArray = elf_files[f].byte_data
strip: bool = options[f].strip
if strip:
stripped_bytes = np.empty((0,3), dtype="uint8")
for i, (name, offset, length) in enumerate(current_parts):
stripped_bytes = np.append(stripped_bytes, bytes[offset : offset+length], axis=0)
bytes = stripped_bytes
elf_files[f].byte_data = bytes
def reshape_data(elf_files: Dict[str,ElfFileData], preferred_width=None) -> None:
"""
Reshape the byte data into rectangular arrays so that we can view them as
images.
:param elf_files: A dict from filename to ElfFileData objects that contains
the ELF and byte data.
:param preferred_width: If specified, we will use this width for all images,
otherweise we'll try to pick a good width automatically.
"""
for f in elf_files:
data = elf_files[f].byte_data
old_shape = data.shape
dimensions = len(old_shape)
assert dimensions in [1,2]
if dimensions == 1:
length = len(data)
else:
length, channels = old_shape
if preferred_width is None:
sqrt_len = int(math.sqrt(len(data)))
w = max(int(sqrt_len / math.sqrt(2) / 8) * 8, 8)
else:
w = preferred_width
h = max(int(length / w), 1)
data = data[:(w*h)]
if dimensions == 1:
data = data.reshape(h,w)
else:
data = data.reshape(h,w,channels)
elf_files[f].byte_data = data
def plot_elf_files(elf_files: Dict[str,ElfFileData], legend_data: Dict[str,List[Tuple[str,Line2D]]]) -> None:
"""
Plot the given ELF files.
:param elf_files: A dict from filename to ElfFileData objects that contains
the ELF and byte data.
:param legend_data: A dict from filename to the names of the parts of the
data and to the Line2D objects containing the colors of
the parts of the data.
"""
num_plots = len(elf_files)
fig, ax = plt.subplots(ncols=num_plots)
for i, f in enumerate(elf_files):
colorized_bytes = elf_files[f].byte_data
elf = elf_files[f].elf_data
cax = ax[i] if num_plots > 1 else ax
cax.imshow(colorized_bytes)
compiler = elf.get_section_by_name(".comment").data().decode("utf-8").strip('\x00')
cax.set_title("{}\n[{}]".format(f, compiler))
cax.legend([x[1] for x in legend_data[f]], [x[0] for x in legend_data[f]], loc=(1.04,0))
h,w = np.shape(colorized_bytes)[0:2]
x_stride = int(w / 5 / 8) * 8
x = np.arange(20) * x_stride
x = np.append(x[x < w - x_stride], w)
cax.set_xticks(x)
y_stride = int(h / 10 / 8) * 8
y = np.arange(20) * y_stride
y = np.append(y[y < h - y_stride], h)
cax.set_yticks(y)
mng = fig.canvas.manager
mng.window.showMaximized()
plt.show()
def parse_args() -> Dict[str,PlottingOptions]:
"""
Parse the arguments.
:return: A dict from filename to PlottingOptions objects which
determine selected sections and stripping.
"""
args: List[str] = sys.argv[1:]
if args == []:
error("No filenames given.")
result: Dict[str,PlottingOptions] = {}
current_filename: str = ""
global_strip: bool = False
global_selected_parts: Set[str] = set()
for arg in args:
if arg.startswith("+"):
try:
if arg.startswith("++"):
current_selected_part = arg[2:]
current_strip = True
else:
current_selected_part = arg[1:]
current_strip = False
if current_filename == "":
global_selected_parts.update(set([current_selected_part]))
global_strip |= current_strip
else:
result[current_filename].selected_parts.update(set([current_selected_part]))
result[current_filename].strip |= current_strip
except:
error("Could not parse argument \"{}\"".format(arg))
elif os.path.isfile(arg):
if arg in result:
error("Specified filename twice: \"{}\"".format(arg))
current_filename = arg
result[current_filename] = PlottingOptions()
else:
error("Not a valid file: \"{}\"".format(arg))
if len(global_selected_parts) != 0:
for filename in result:
result[filename].selected_parts.update(set(global_selected_parts))
result[filename].strip |= global_strip
return result
def main() -> None:
options = parse_args()
filenames = options.keys()
# Load the object code files as bytes and ELF data
elf_files = load_elf_files(filenames)
# Get the parts of the file and filter them
parts = get_parts(elf_files, options)
# Colorize the data so that the ELF header, .text, .data, … all have
# different colors.
legend_data = colorize_data(elf_files, parts)
# If necessary, strip away unwanted bytes
strip_data(elf_files, parts, options)
# Get the maximum length and pad each array to this value
max_length = get_max_length([e.byte_data for e in elf_files.values()])
for f in filenames:
elf_files[f].byte_data = pad_array(elf_files[f].byte_data, max_length)
reshape_data(elf_files)
# Plot the arrays.
plot_elf_files(elf_files, legend_data)
if __name__ == '__main__':
main()