-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgraph_stats.py
153 lines (139 loc) · 6.55 KB
/
graph_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import json
import sys
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import MultipleLocator
#matplotlib.use('tkagg')
def median(lst):
n = len(lst)
s = sorted(lst)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
def graph_blocks_vs_time(dataset):
for data in dataset.values():
times = [x[1] for x in data]
block_counts = [x[2] + x[3] + x[4] + x[5] for x in data]
plt.plot(block_counts, times, 'o')
plt.show()
#raise NotImplementedError
def graph_funcs_vs_time(dataset):
func_count = []
total_times = []
for data in dataset.values():
func_count.append(len(data))
total_time = sum([x[1] for x in data])
total_times.append(total_time)
plt.plot(func_count, total_times, 'o')
plt.show()
def get_data(filenames):
dataset = {}
for filename in filenames:
with open(filename) as f:
data = json.load(f)
dataset[filename] = data
return dataset
def get_aggregate_data(dataset):
aggregate_data = []
#print(len(dataset.keys()))
for name,data in dataset.items():
name = name.split('/')[-1].split(".")[0]
times = [x[2] + x[3] + x[4] + x[5] + x[6] for x in data]
average_t = sum(times) / len(times)
median_t = median(times)
max_t = max(times)
min_t = min(times)
total_t = sum(times)
num_funcs = len(times)
N = len(times) // 100
print("top 1% = ", N, " functions out of", len(times))
top_n = sorted(data, key=lambda x: (x[2] + x[3] + x[4] + x[5] + x[6]), reverse = True)[:N]
top_percent = sum([x[2] + x[3] + x[4] + x[5] + x[6] for x in top_n]) / total_t
top_percent_medians = median([x[1] for x in top_n])
cfg_percent = sum([x[2] for x in data]) / total_t
stack_percent = sum([x[3] for x in data]) / total_t
heap_percent = sum([x[4] for x in data]) / total_t
call_percent = sum([x[5] for x in data]) / total_t
locals_percent = sum([x[6] for x in data]) / total_t
print(top_n, top_percent)
median_blocks = median([x[1] for x in data])
aggregate_data.append( (name,average_t,median_t,max_t,min_t,num_funcs,total_t,top_percent, cfg_percent, stack_percent, heap_percent, call_percent, locals_percent, median_blocks, top_percent_medians))
return aggregate_data
def generate_summary_table(aggregate_data):
names_row = " &"
average_row = "Average Function Validation Time (s) & "
median_row = "Median Function Validation Time (s) & "
max_row = "Max Function Validation Time (s) & "
min_row = "Min Function Validation Time (s) & "
num_funcs_row = "\\# Functions in Module & "
total_row = "Total Validation Time (s) & "
#for name,average_t,median_t,max_t,min_t in aggregate_data:
names_row += " & ".join([str(d[0]) for d in aggregate_data]) + "\\\\"
average_row += " & ".join([str(round(d[1],2)) for d in aggregate_data]) + "\\\\"
median_row += " & ".join([str(round(d[2],2)) for d in aggregate_data]) + "\\\\"
max_row += " & ".join([str(round(d[3],2)) for d in aggregate_data]) + "\\\\"
min_row += " & ".join([str(round(d[4],2)) for d in aggregate_data]) + "\\\\"
num_funcs_row += " & ".join([str(round(d[5],2)) for d in aggregate_data]) + "\\\\"
total_row += " & ".join([str(round(d[6],2)) for d in aggregate_data]) + "\\\\"
table_str = "\n".join([names_row, average_row, median_row, max_row, min_row, num_funcs_row, total_row]) + "\n"
return table_str
#print out some quick statistics
def summarise_data(aggregate_data):
medians = [round(d[2],2) for d in aggregate_data]
maxes = [round(d[3],2) for d in aggregate_data]
num_funcs = [round(d[5],2) for d in aggregate_data]
times = [round(d[6],2) for d in aggregate_data]
one_percent = [d[7] for d in aggregate_data]
cfg_percent = [d[8] for d in aggregate_data]
stack_percent = [d[9] for d in aggregate_data]
heap_percent = [d[10] for d in aggregate_data]
call_percent = [d[11] for d in aggregate_data]
locals_percent = [d[12] for d in aggregate_data]
median_blocks = [d[13] for d in aggregate_data]
top_percent_median_blocks = [d[14] for d in aggregate_data]
#print(averages)
#medians = [round(d[2],2) for d in aggregate_data]
#print(medians)
print(f"Number of binaries = {len(times)}")
print(f"Median function validation time: {median(medians)}")
num_above_min = len([time for time in maxes if time > 60.0])
print(f"Number of binariess with a function that took > 1 minute to validate: {num_above_min}")
print(f"Top 1% of functions account for (on average) {sum(one_percent) / len(one_percent) * 100}% of total execution time")
print(f"{sum(cfg_percent) / len(one_percent) * 100}% of verification time spent making CFGs")
print(f"{sum(stack_percent) / len(stack_percent) * 100}% of verification time spent checking stack")
print(f"{sum(heap_percent) / len(heap_percent) * 100}% of verification time spent checking heap")
print(f"{sum(call_percent) / len(call_percent) * 100}% of verification time spent checking calls")
print(f"{sum(locals_percent) / len(locals_percent) * 100}% of verification time spent checking locals")
print(f"Average Time = {sum(times) / len(times)}")
#print(f"Average Max function Time = {sum(maxes) / len(maxes)}")
print(f"Min Validation Time: {min(times)}")
print(f"Max Validation Time: {max(times)}")
print(f"Median Validation Time = {median(times)}")
print(f"Min Functions: {min(num_funcs)}")
print(f"Max Functions: {max(num_funcs)}")
print(f"Median Functions: {median(num_funcs)}")
print(f"Median of Median of blocks in modules: {median(median_blocks)}")
print(f"Median of Median of blocks in top 1% of functions in modules: {median(top_percent_median_blocks)}")
fig, ax = plt.subplots()
ax.xaxis.set_minor_locator(MultipleLocator(5))
plt.xlabel('Module Validation Time (s)')
plt.ylabel('# of Modules')
plt.hist(times, bins= math.ceil((max(times) - min(times))/5) )
print("Histogram Created")
plt.savefig("performance.pdf")
print("Histogram Saved")
def run(filenames):
dataset = get_data(filenames)
#graph_blocks_vs_time(dataset)
#graph_funcs_vs_time(dataset)
aggregate_data = get_aggregate_data(dataset)
summarise_data(aggregate_data)
table = generate_summary_table(aggregate_data)
print(table)
def main():
filename = sys.argv[1]
print(sys.argv)
filenames = sys.argv[1:]
run(filenames)
if __name__ == "__main__":
main()