4
4
from statistics import mean
5
5
6
6
7
+ CACHE_DIR = os .path .expanduser ("~/.exeplot" )
7
8
# https://matplotlib.org/2.0.2/examples/color/named_colors.html
8
9
COLORS = {
9
10
None : ["salmon" , "gold" , "plum" , "darkkhaki" , "orchid" , "sandybrown" , "purple" , "khaki" , "peru" , "thistle" ],
11
+ 'header' : "black" ,
10
12
'headers' : "black" ,
11
13
'overlay' : "lightgray" ,
14
+ 'section header' : "black" ,
15
+ 'section headers' : "black" ,
12
16
'<undef>' : "lightgray" ,
13
17
# common
14
18
'text' : "darkseagreen" , # code
41
45
MIN_ZONE_WIDTH = 3 # minimum number of samples on the entropy plot for a section (so that it can still be visible even
42
46
# if it is far smaller than the other sections)
43
47
N_SAMPLES = 2048
48
+ SHADOW = {'shade' : .3 , 'ox' : .005 , 'oy' : - .005 , 'linewidth' : 0. }
44
49
SUBLABELS = {
45
- 'ep' : lambda d : "EP at 0x%.8x in %s" % d ['entrypoint ' ][1 :],
50
+ 'ep' : lambda d : "EP at 0x%.8x in %s" % d ['ep ' ][1 :],
46
51
'size' : lambda d : "Size = %s" % _human_readable_size (d ['size' ], 1 ),
47
52
'size-ep' : lambda d : "Size = %s\n EP at 0x%.8x in %s" % \
48
- (_human_readable_size (d ['size' ], 1 ), d ['entrypoint' ][1 ], d ['entrypoint' ][2 ]),
53
+ (_human_readable_size (d ['size' ], 1 ), d ['ep' ][1 ], d ['ep' ][2 ]),
54
+ 'size-ent' : lambda d : "Size = %s\n Average entropy: %.2f\n Overall entropy: %.2f" % \
55
+ (_human_readable_size (d ['size' ], 1 ), mean (d ['entropy' ]) * 8 , d ['entropy*' ]),
49
56
'size-ep-ent' : lambda d : "Size = %s\n EP at 0x%.8x in %s\n Average entropy: %.2f\n Overall entropy: %.2f" % \
50
- (_human_readable_size (d ['size' ], 1 ), d ['entrypoint ' ][1 ], d ['entrypoint ' ][2 ],
51
- mean ( d [ 'entropy' ]) * 8 , d ['entropy*' ]),
57
+ (_human_readable_size (d ['size' ], 1 ), d ['ep ' ][1 ], d ['ep ' ][2 ], mean ( d [ 'entropy' ]) * 8 ,
58
+ d ['entropy*' ]),
52
59
}
53
60
54
61
@@ -65,7 +72,7 @@ def _ensure_str(s, encoding='utf-8', errors='strict'):
65
72
66
73
def _human_readable_size (size , precision = 0 ):
67
74
i , units = 0 , ["B" , "KB" , "MB" , "GB" , "TB" , "PB" , "EB" , "ZB" , "YB" ]
68
- while size >= 1024 and i < len (units ):
75
+ while size >= 1024 and i < len (units )- 1 :
69
76
i += 1
70
77
size /= 1024.0
71
78
return "%.*f%s" % (precision , size , units [i ])
@@ -74,7 +81,7 @@ def _human_readable_size(size, precision=0):
74
81
class Binary :
75
82
def __init__ (self , path , ** kwargs ):
76
83
from lief import logging , parse
77
- self .path = str (path )
84
+ self .path = os . path . abspath ( str (path ) )
78
85
self .basename = os .path .basename (self .path )
79
86
self .stem = os .path .splitext (os .path .basename (self .path ))[0 ]
80
87
l = kwargs .get ('logger' )
@@ -97,20 +104,132 @@ def __getattr__(self, name):
97
104
except AttributeError :
98
105
return getattr (self .__binary , name )
99
106
107
+ def __iter__ (self ):
108
+ for _ in self .__sections_data ():
109
+ yield _
110
+
100
111
def __str__ (self ):
101
112
return self .path
102
113
103
114
def __get_ep_and_section (self ):
115
+ b = self .__binary
104
116
try :
105
117
if self .type in ["ELF" , "MachO" ]:
106
- self .__ep = self . __binary . virtual_address_to_offset (self . __binary .entrypoint )
107
- self .__ep_section = self . __binary .section_from_offset (self .__ep )
118
+ self .__ep = b . virtual_address_to_offset (b .entrypoint )
119
+ self .__ep_section = b .section_from_offset (self .__ep )
108
120
elif self .type == "PE" :
109
- self .__ep = self . __binary . rva_to_offset (self . __binary .optional_header .addressof_entrypoint )
110
- self .__ep_section = self . __binary . section_from_rva (self . __binary .optional_header .addressof_entrypoint )
121
+ self .__ep = b . rva_to_offset (b .optional_header .addressof_entrypoint )
122
+ self .__ep_section = b . section_from_rva (b .optional_header .addressof_entrypoint )
111
123
except (AttributeError , TypeError ):
112
124
self .__ep , self .__ep_section = None , None
113
125
126
+ def __sections_data (self ):
127
+ b = self .__binary
128
+ # create a first section for the headers
129
+ if self .type == "PE" :
130
+ h_len = b .sizeof_headers
131
+ elif self .type == "ELF" :
132
+ h_len = b .header .header_size + b .header .program_header_size * b .header .numberof_segments
133
+ elif self .type == "MachO" :
134
+ h_len = [28 , 32 ][str (b .header .magic )[- 3 :] == "_64" ] + b .header .sizeof_cmds
135
+ yield 0 , f"[0] Header ({ _human_readable_size (h_len )} )" , 0 , h_len , "black"
136
+ # then handle binary's sections
137
+ color_cursor , i = 0 , 1
138
+ for section in sorted (b .sections , key = lambda s : s .offset ):
139
+ if section .name == "" and section .size == 0 and len (section .content ) == 0 :
140
+ continue
141
+ try :
142
+ c = COLORS [self .section_names [section .name ].lower ().lstrip ("._" ).strip ("\x00 \n " )]
143
+ except KeyError :
144
+ co = COLORS [None ]
145
+ c = co [color_cursor % len (co )]
146
+ color_cursor += 1
147
+ start , end = section .offset , section .offset + section .size
148
+ yield i , f"[{ i } ] { self .section_names [section .name ]} ({ _human_readable_size (end - start )} )" , start , end , c
149
+ i += 1
150
+ # sections header at the end for ELF files
151
+ if self .type == "ELF" :
152
+ start , end = end , end + b .header .section_header_size * b .header .numberof_sections
153
+ yield i , f"[{ i } ] Section Header ({ _human_readable_size (end - start )} )" , start , end , "black"
154
+ i += 1
155
+ # finally, handle the overlay
156
+ start , end = self .size - b .overlay .nbytes , self .size
157
+ yield i , f"[{ i } ] Overlay ({ _human_readable_size (end - start )} )" , start , self .size , "lightgray"
158
+ i += 1
159
+ yield i , f"TOTAL: { _human_readable_size (self .size )} " , None , None , "white"
160
+
161
+ def __segments_data (self ):
162
+ b = self .__binary
163
+ if self .type == "PE" :
164
+ return # segments only apply to ELF and MachO
165
+ elif self .type == "ELF" :
166
+ for i , s in enumerate (sorted (b .segments , key = lambda x : (x .file_offset , x .physical_size ))):
167
+ yield i , f"[{ i } ] { str (s .type ).split ('.' )[1 ]} ({ _human_readable_size (s .physical_size )} )" , \
168
+ s .file_offset , s .file_offset + s .physical_size , "lightgray"
169
+ elif self .type == "MachO" :
170
+ for i , s in enumerate (sorted (b .segments , key = lambda x : (x .file_offset , x .file_size ))):
171
+ yield i , f"[{ i } ] { s .name } ({ _human_readable_size (s .file_size )} )" , \
172
+ s .file_offset , s .file_offset + s .file_size , "lightgray"
173
+
174
+ def _data (self , segments = False , overlap = False ):
175
+ data = [self .__sections_data , self .__segments_data ][segments ]
176
+ # generator for getting next items, taking None value into account for the start offset
177
+ def _nexts (n ):
178
+ for j , t , s , e , c in data ():
179
+ if j <= n or s is None :
180
+ continue
181
+ yield j , t , s , e , c
182
+ # collect data, including x positions, [w]idths, [t]exts and [c]olors
183
+ x , w , t , c , cursors , legend , layer = {0 : []}, {0 : []}, {0 : []}, {0 : []}, {0 : 0 }, {'colors' : [], 'texts' : []}, 0
184
+ for i , text , start , end , color in data ():
185
+ legend ['colors' ].append (color ), legend ['texts' ].append (text )
186
+ if start is None or end is None :
187
+ continue
188
+ end = min (self .size , end )
189
+ width = end - start
190
+ if overlap :
191
+ # set the layer first
192
+ for n in range (layer + 1 ):
193
+ if start >= cursors [n ]:
194
+ layer = n
195
+ break
196
+ if start < cursors [layer ]:
197
+ layer += 1
198
+ # create layer data if layer does not exist yet
199
+ if layer not in x :
200
+ x [layer ], w [layer ], t [layer ], c [layer ], cursors [layer ] = [], [], [], [], 0
201
+ # if not starting at layer's cursor, fill up to start index with a blank section
202
+ if start > cursors [layer ]:
203
+ x [layer ].append (cursors [layer ]), w [layer ].append (start - cursors [layer ])
204
+ t [layer ].append ("_" ), c [layer ].append ("white" )
205
+ # then add the current section
206
+ cursors [layer ] = end
207
+ x [layer ].append (start ), w [layer ].append (width ), t [layer ].append (text ), c [layer ].append (color )
208
+ else :
209
+ # adjust "end" if section overlap
210
+ for j , _ , start2 , _ , _ in _nexts (i ):
211
+ end = min (start2 , end )
212
+ width = end - start
213
+ break
214
+ x [0 ].append (start ), w [0 ].append (width ), t [0 ].append (text ), c [0 ].append (color )
215
+ # add a blank if the next section does not start from the end
216
+ for j , _ , start2 , _ , _ in _nexts (i ):
217
+ if j <= i or start2 is None :
218
+ continue
219
+ if start2 > end :
220
+ x [0 ].append (end ), w [0 ].append (start2 - end ), t [0 ].append ("_" ), c [0 ].append ("white" )
221
+ break
222
+ for i in range (len (x )):
223
+ if len (x [i ]) > 0 :
224
+ end = x [i ][- 1 ] + w [i ][- 1 ]
225
+ if end < self .size :
226
+ x [i ].append (end ), w [i ].append (self .size - end ), t [i ].append ("_" ), c [i ].append ("white" )
227
+ if sum (w [i ]) != self .size :
228
+ for start , width , section , color in zip (x [i ], w [i ], t [i ], c [i ]):
229
+ print (f"LAYER { i } " , section , color , start , width )
230
+ raise ValueError (f"Sizes do not match at layer { i } ({ sum (w [i ])} != { self .size } )" )
231
+ yield i , x [i ], w [i ], t [i ], c [i ], legend
232
+
114
233
@cached_property
115
234
def entrypoint (self ):
116
235
self .__get_ep_and_section ()
@@ -121,6 +240,13 @@ def entrypoint_section(self):
121
240
self .__get_ep_and_section ()
122
241
return self .__ep_section
123
242
243
+ @cached_property
244
+ def hash (self ):
245
+ from hashlib import sha256
246
+ m = sha256 ()
247
+ m .update (self .rawbytes )
248
+ return m .hexdigest ()
249
+
124
250
@property
125
251
def rawbytes (self ):
126
252
with open (self .path , "rb" ) as f :
@@ -129,8 +255,7 @@ def rawbytes(self):
129
255
130
256
@cached_property
131
257
def section_names (self ):
132
- __sn = lambda s : _ensure_str (s ).strip ("\x00 " ) or _ensure_str (s ) or "<empty>"
133
- names = {s .name : __sn (s .name ) for s in self .__binary .sections }
258
+ names = {s .name : _ensure_str (s .name ).strip ("\x00 " ) or "<empty>" for s in self .__binary .sections }
134
259
# names from string table only applies to PE
135
260
if self .type != "PE" :
136
261
return names
@@ -139,10 +264,11 @@ def section_names(self):
139
264
if all (match (r"/\d+$" , n ) is None for n in names .keys ()):
140
265
return names
141
266
real_names = {}
267
+ str_table_offset = self .__binary .header .pointerto_symbol_table + self .__binary .header .numberof_symbols * 18
142
268
with open (self .path , "rb" ) as f :
143
269
for n in names :
144
270
if match (r"/\d+$" , n ):
145
- f .seek (string_table_offset + int (name [1 :]))
271
+ f .seek (str_table_offset + int (n [1 :]))
146
272
n2 = b"" .join (iter (lambda : f .read (1 ), b'\x00 ' )).decode ("utf-8" , errors = "ignore" )
147
273
else :
148
274
n2 = n
0 commit comments