Skip to content

Commit 219b75c

Browse files
committed
Improved plots
1 parent 292f8ea commit 219b75c

File tree

12 files changed

+456
-146
lines changed

12 files changed

+456
-146
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,4 @@ TODO
7676
script.py
7777
tool.py
7878
/*.png
79-
*.exe
79+
/*.exe

src/exeplot/VERSION.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.1.0
1+
0.2.0

src/exeplot/__conf__.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
'font_family': "serif",
1414
'font_size': 10,
1515
'img_format': "png",
16+
'shadow': True,
1617
'style': "default",
17-
# 'transparent': False,
18+
'transparent': False,
1819
}
1920

2021

21-
def configure():
22+
def configure(): # pragma: no cover
2223
from configparser import ConfigParser
2324
from os.path import exists, expanduser
2425
path = expanduser("~/.exeplot.conf")
@@ -43,6 +44,9 @@ def configure_fonts(**kw):
4344
kw['suptitle-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
4445
'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * 1.2)),
4546
'fontweight': kw.pop('suptitle_font_weight', "normal")}
47+
kw['annotation-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
48+
'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * .5)),
49+
'fontweight': kw.pop('suptitle_font_weight', "normal")}
4650
for p in "xy":
4751
kw[f'{p}label-font'] = {'fontfamily': kw.pop(f'{p}label_font_family', config['font_family']),
4852
'fontsize': kw.pop(f'{p}label_font_size', config['font_size']),
@@ -56,28 +60,32 @@ def save_figure(f):
5660
function ; put it in the "figures" subfolder of the current experiment's folder if relevant. """
5761
@wraps(f)
5862
def _wrapper(*a, **kw):
63+
import matplotlib.pyplot as plt
5964
from os import makedirs
6065
from os.path import basename, dirname, splitext
61-
logger.info("Preparing plot data...")
66+
from .plots.__common__ import Binary
67+
plot_type = f.__globals__['__name__'].split(".")[-1]
68+
logger.info(f"Preparing {plot_type} plot data...")
6269
configure()
63-
imgs = f(*a, **configure_fonts(**kw))
64-
ext = "." + kw.get('img_format', config['img_format'])
65-
kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi"]}
70+
kw = configure_fonts(**kw)
71+
imgs = f(*a, **kw)
72+
r = []
73+
kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi", "transparent"]}
6674
for img in (imgs if isinstance(imgs, (list, tuple, type(x for x in []))) else [imgs]):
67-
if img is None:
68-
img = kw.get('img_name') or splitext(basename(a[0]))[0]
69-
if not img.endswith(ext):
75+
img = img or kw.get('img_name') or f"{splitext(basename(a[0]))[0]}_{plot_type}"
76+
if not img.endswith(ext := "." + kw.get('img_format', config['img_format'])):
7077
img += ext
71-
if d := dirname(img):
72-
makedirs(d, exist_ok=True)
73-
if kw.get('interactive_mode', False):
78+
makedirs(dirname(img) or ".", exist_ok=True)
79+
if kw.get('interactive_mode', False): # pragma: no cover
7480
from code import interact
81+
logger.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
7582
ns = {k: v for k, v in globals().items()}
7683
ns.update(locals())
77-
l.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
7884
interact(local=ns)
7985
logger.info(f"Saving to {img}...")
8086
plt.savefig(img, **kw_plot)
8187
logger.debug(f"> saved to {img}...")
88+
r.append(img)
89+
return r
8290
return _wrapper
8391

src/exeplot/__main__.py

+7-13
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def _parser(name, description, examples):
1313
epilog="usage examples:\n " + "\n ".join(examples) if len(examples) > 0 else None)
1414

1515

16-
def _setup(parser):
16+
def _setup(parser): # pragma: no cover
1717
args = parser.parse_args()
1818
if hasattr(args, "verbose"):
1919
import logging
@@ -31,18 +31,12 @@ def main():
3131
plots = parser.add_subparsers(dest="type", help="plot type")
3232
for plot in _plots:
3333
plot_func = globals()[plot]
34-
plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip()))
35-
"""
36-
opt = plot_parser.add_argument_group("style arguments")
37-
for a in ["title-font", "suptitle-font", "xlabel-font", "ylable-font"]:
38-
for i in ["family", "size", "weight"]:
39-
kw = {'help': "", 'metavar': i.upper()}
40-
if i == "size":
41-
kw['type'] = int
42-
elif i == "weight":
43-
kw['choices'] = ("normal", "bold", "italic")
44-
opt.add_argument(f"--{a}-{i}", **kw)
45-
"""
34+
plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip(), add_help=False))
35+
opt = plot_parser.add_argument_group("options")
36+
opt.add_argument("--no-title", action="store_true", help="do not display the title (default: False)")
37+
extra = plot_parser.add_argument_group("extra arguments")
38+
extra.add_argument("-h", "--help", action="help", help="show this help message and exit")
39+
extra.add_argument("-i", "--interactive-mode", action="store_true", help="open Python console to edit the plot")
4640
args = _setup(parser)
4741
exe = args.executable if isinstance(args.executable, list) else [args.executable]
4842
delattr(args, "executable")

src/exeplot/plots/__common__.py

+139-13
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
from statistics import mean
55

66

7+
CACHE_DIR = os.path.expanduser("~/.exeplot")
78
# https://matplotlib.org/2.0.2/examples/color/named_colors.html
89
COLORS = {
910
None: ["salmon", "gold", "plum", "darkkhaki", "orchid", "sandybrown", "purple", "khaki", "peru", "thistle"],
11+
'header': "black",
1012
'headers': "black",
1113
'overlay': "lightgray",
14+
'section header': "black",
15+
'section headers': "black",
1216
'<undef>': "lightgray",
1317
# common
1418
'text': "darkseagreen", # code
@@ -41,14 +45,17 @@
4145
MIN_ZONE_WIDTH = 3 # minimum number of samples on the entropy plot for a section (so that it can still be visible even
4246
# if it is far smaller than the other sections)
4347
N_SAMPLES = 2048
48+
SHADOW = {'shade': .3, 'ox': .005, 'oy': -.005, 'linewidth': 0.}
4449
SUBLABELS = {
45-
'ep': lambda d: "EP at 0x%.8x in %s" % d['entrypoint'][1:],
50+
'ep': lambda d: "EP at 0x%.8x in %s" % d['ep'][1:],
4651
'size': lambda d: "Size = %s" % _human_readable_size(d['size'], 1),
4752
'size-ep': lambda d: "Size = %s\nEP at 0x%.8x in %s" % \
48-
(_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2]),
53+
(_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2]),
54+
'size-ent': lambda d: "Size = %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
55+
(_human_readable_size(d['size'], 1), mean(d['entropy']) * 8, d['entropy*']),
4956
'size-ep-ent': lambda d: "Size = %s\nEP at 0x%.8x in %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
50-
(_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2],
51-
mean(d['entropy']) * 8, d['entropy*']),
57+
(_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2], mean(d['entropy']) * 8,
58+
d['entropy*']),
5259
}
5360

5461

@@ -65,7 +72,7 @@ def _ensure_str(s, encoding='utf-8', errors='strict'):
6572

6673
def _human_readable_size(size, precision=0):
6774
i, units = 0, ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
68-
while size >= 1024 and i < len(units):
75+
while size >= 1024 and i < len(units)-1:
6976
i += 1
7077
size /= 1024.0
7178
return "%.*f%s" % (precision, size, units[i])
@@ -74,7 +81,7 @@ def _human_readable_size(size, precision=0):
7481
class Binary:
7582
def __init__(self, path, **kwargs):
7683
from lief import logging, parse
77-
self.path = str(path)
84+
self.path = os.path.abspath(str(path))
7885
self.basename = os.path.basename(self.path)
7986
self.stem = os.path.splitext(os.path.basename(self.path))[0]
8087
l = kwargs.get('logger')
@@ -97,20 +104,132 @@ def __getattr__(self, name):
97104
except AttributeError:
98105
return getattr(self.__binary, name)
99106

107+
def __iter__(self):
108+
for _ in self.__sections_data():
109+
yield _
110+
100111
def __str__(self):
101112
return self.path
102113

103114
def __get_ep_and_section(self):
115+
b = self.__binary
104116
try:
105117
if self.type in ["ELF", "MachO"]:
106-
self.__ep = self.__binary.virtual_address_to_offset(self.__binary.entrypoint)
107-
self.__ep_section = self.__binary.section_from_offset(self.__ep)
118+
self.__ep = b.virtual_address_to_offset(b.entrypoint)
119+
self.__ep_section = b.section_from_offset(self.__ep)
108120
elif self.type == "PE":
109-
self.__ep = self.__binary.rva_to_offset(self.__binary.optional_header.addressof_entrypoint)
110-
self.__ep_section = self.__binary.section_from_rva(self.__binary.optional_header.addressof_entrypoint)
121+
self.__ep = b.rva_to_offset(b.optional_header.addressof_entrypoint)
122+
self.__ep_section = b.section_from_rva(b.optional_header.addressof_entrypoint)
111123
except (AttributeError, TypeError):
112124
self.__ep, self.__ep_section = None, None
113125

126+
def __sections_data(self):
127+
b = self.__binary
128+
# create a first section for the headers
129+
if self.type == "PE":
130+
h_len = b.sizeof_headers
131+
elif self.type == "ELF":
132+
h_len = b.header.header_size + b.header.program_header_size * b.header.numberof_segments
133+
elif self.type == "MachO":
134+
h_len = [28, 32][str(b.header.magic)[-3:] == "_64"] + b.header.sizeof_cmds
135+
yield 0, f"[0] Header ({_human_readable_size(h_len)})", 0, h_len, "black"
136+
# then handle binary's sections
137+
color_cursor, i = 0, 1
138+
for section in sorted(b.sections, key=lambda s: s.offset):
139+
if section.name == "" and section.size == 0 and len(section.content) == 0:
140+
continue
141+
try:
142+
c = COLORS[self.section_names[section.name].lower().lstrip("._").strip("\x00\n ")]
143+
except KeyError:
144+
co = COLORS[None]
145+
c = co[color_cursor % len(co)]
146+
color_cursor += 1
147+
start, end = section.offset, section.offset + section.size
148+
yield i, f"[{i}] {self.section_names[section.name]} ({_human_readable_size(end - start)})", start, end, c
149+
i += 1
150+
# sections header at the end for ELF files
151+
if self.type == "ELF":
152+
start, end = end, end + b.header.section_header_size * b.header.numberof_sections
153+
yield i, f"[{i}] Section Header ({_human_readable_size(end - start)})", start, end, "black"
154+
i += 1
155+
# finally, handle the overlay
156+
start, end = self.size - b.overlay.nbytes, self.size
157+
yield i, f"[{i}] Overlay ({_human_readable_size(end - start)})", start, self.size, "lightgray"
158+
i += 1
159+
yield i, f"TOTAL: {_human_readable_size(self.size)}", None, None, "white"
160+
161+
def __segments_data(self):
162+
b = self.__binary
163+
if self.type == "PE":
164+
return # segments only apply to ELF and MachO
165+
elif self.type == "ELF":
166+
for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.physical_size))):
167+
yield i, f"[{i}] {str(s.type).split('.')[1]} ({_human_readable_size(s.physical_size)})", \
168+
s.file_offset, s.file_offset+s.physical_size, "lightgray"
169+
elif self.type == "MachO":
170+
for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.file_size))):
171+
yield i, f"[{i}] {s.name} ({_human_readable_size(s.file_size)})", \
172+
s.file_offset, s.file_offset+s.file_size, "lightgray"
173+
174+
def _data(self, segments=False, overlap=False):
175+
data = [self.__sections_data, self.__segments_data][segments]
176+
# generator for getting next items, taking None value into account for the start offset
177+
def _nexts(n):
178+
for j, t, s, e, c in data():
179+
if j <= n or s is None:
180+
continue
181+
yield j, t, s, e, c
182+
# collect data, including x positions, [w]idths, [t]exts and [c]olors
183+
x, w, t, c, cursors, legend, layer = {0: []}, {0: []}, {0: []}, {0: []}, {0: 0}, {'colors': [], 'texts': []}, 0
184+
for i, text, start, end, color in data():
185+
legend['colors'].append(color), legend['texts'].append(text)
186+
if start is None or end is None:
187+
continue
188+
end = min(self.size, end)
189+
width = end - start
190+
if overlap:
191+
# set the layer first
192+
for n in range(layer + 1):
193+
if start >= cursors[n]:
194+
layer = n
195+
break
196+
if start < cursors[layer]:
197+
layer += 1
198+
# create layer data if layer does not exist yet
199+
if layer not in x:
200+
x[layer], w[layer], t[layer], c[layer], cursors[layer] = [], [], [], [], 0
201+
# if not starting at layer's cursor, fill up to start index with a blank section
202+
if start > cursors[layer]:
203+
x[layer].append(cursors[layer]), w[layer].append(start - cursors[layer])
204+
t[layer].append("_"), c[layer].append("white")
205+
# then add the current section
206+
cursors[layer] = end
207+
x[layer].append(start), w[layer].append(width), t[layer].append(text), c[layer].append(color)
208+
else:
209+
# adjust "end" if section overlap
210+
for j, _, start2, _, _ in _nexts(i):
211+
end = min(start2, end)
212+
width = end - start
213+
break
214+
x[0].append(start), w[0].append(width), t[0].append(text), c[0].append(color)
215+
# add a blank if the next section does not start from the end
216+
for j, _, start2, _, _ in _nexts(i):
217+
if j <= i or start2 is None:
218+
continue
219+
if start2 > end:
220+
x[0].append(end), w[0].append(start2 - end), t[0].append("_"), c[0].append("white")
221+
break
222+
for i in range(len(x)):
223+
if len(x[i]) > 0:
224+
end = x[i][-1] + w[i][-1]
225+
if end < self.size:
226+
x[i].append(end), w[i].append(self.size-end), t[i].append("_"), c[i].append("white")
227+
if sum(w[i]) != self.size:
228+
for start, width, section, color in zip(x[i], w[i], t[i], c[i]):
229+
print(f"LAYER {i}", section, color, start, width)
230+
raise ValueError(f"Sizes do not match at layer {i} ({sum(w[i])} != {self.size})")
231+
yield i, x[i], w[i], t[i], c[i], legend
232+
114233
@cached_property
115234
def entrypoint(self):
116235
self.__get_ep_and_section()
@@ -121,6 +240,13 @@ def entrypoint_section(self):
121240
self.__get_ep_and_section()
122241
return self.__ep_section
123242

243+
@cached_property
244+
def hash(self):
245+
from hashlib import sha256
246+
m = sha256()
247+
m.update(self.rawbytes)
248+
return m.hexdigest()
249+
124250
@property
125251
def rawbytes(self):
126252
with open(self.path, "rb") as f:
@@ -129,8 +255,7 @@ def rawbytes(self):
129255

130256
@cached_property
131257
def section_names(self):
132-
__sn = lambda s: _ensure_str(s).strip("\x00") or _ensure_str(s) or "<empty>"
133-
names = {s.name: __sn(s.name) for s in self.__binary.sections}
258+
names = {s.name: _ensure_str(s.name).strip("\x00") or "<empty>" for s in self.__binary.sections}
134259
# names from string table only applies to PE
135260
if self.type != "PE":
136261
return names
@@ -139,10 +264,11 @@ def section_names(self):
139264
if all(match(r"/\d+$", n) is None for n in names.keys()):
140265
return names
141266
real_names = {}
267+
str_table_offset = self.__binary.header.pointerto_symbol_table + self.__binary.header.numberof_symbols * 18
142268
with open(self.path, "rb") as f:
143269
for n in names:
144270
if match(r"/\d+$", n):
145-
f.seek(string_table_offset + int(name[1:]))
271+
f.seek(str_table_offset + int(n[1:]))
146272
n2 = b"".join(iter(lambda: f.read(1), b'\x00')).decode("utf-8", errors="ignore")
147273
else:
148274
n2 = n

src/exeplot/plots/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
name = f[:-3]
1313
module = importlib.import_module(f".{name}", package=__name__)
1414
if hasattr(module, "plot") and callable(getattr(module, "plot")):
15-
globals()[f"{name}"] = getattr(module, "plot")
15+
globals()[f"{name}"] = f = getattr(module, "plot")
16+
f.__args__ = getattr(module, "arguments")
1617
__all__.append(name)
1718

0 commit comments

Comments
 (0)