-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacadnn.py
258 lines (200 loc) · 7.89 KB
/
acadnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# Top Script that launches the ACADnn algorithm. Launches sequential simulations, can be converted to parallel sims if needed.
# File Name: ACADnn.py
# Date: Dec 18th, 2021
# Authors: Selvaraj Anandaraj
import os
import re
# Number of elements you consider based on tightness
stride_range = 2
# Parameter that mentions the tightness about the routing congestion near the memory.
associativity = True
# Parameter that mentions the transistor size and area o
area = True
# Parameter memory size tightness
memory_size = True
# Parameter to weigh power also
Power = True
# The code below is only for stride is 2
###########################################################################
#Since stride range is 2, we have only 2 settings for each configuration
tight_l1d_size = ["64kB","128kB"]
loose_l1d_size = ["128kB","256kB"]
tight_l1d_assoc = ["2","4"]
loose_l1d_assoc = ["1","2"]
tight_l1d_taglat = ["2","4"]
loose_l1d_taglat = ["1","2"]
tight_l2_size = ["256kB","512kB"]
loose_l2_size = ["512kB","1024kB"]
tight_mul_oplat = ["2","4"]
loose_mul_oplat = ["1","2"]
l1d_size = tight_l1d_size if memory_size else loose_l1d_size
l1d_assoc = tight_l1d_assoc if associativity else loose_l1d_assoc
l1d_taglat = tight_l1d_taglat if area else loose_l1d_taglat
l2_size = tight_l2_size if memory_size else loose_l2_size
mul_oplat = tight_mul_oplat if area else loose_mul_oplat
##########################################################################
def execute (l2_present, l1d_size, l1d_assoc, l1d_taglat, l2_size, mul_oplat):
if l2_present == True:
string = "build/X86/gem5.opt configs/learning_gem5/part1/o3_with_l2.py --l1d_size=\"" + str(l1d_size) + "\" --l1d_assoc=\"" + str(l1d_assoc) + "\" --l1d_tag_lat=\"" + str(l1d_taglat) + "\" --SimdFloatMul_OpLatency=\""+ str(mul_oplat) + "\" --l2_size=\"" + str(l2_size) + "\""
config_tag = str(l1d_size) + "_" + str(l1d_assoc) + "_" + str(l1d_taglat) + "_" + str(l2_size) + "_" + str(mul_oplat)
os.system(string)
file_handle = open("m5out/stats.txt","r")
lines = file_handle.readlines()
string = ""
for line in lines:
if (line.find('simTicks') != -1):
m = re.search(r"\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
performance = int(string)
string = ""
if (line.find('0.averagePower') != -1):
m = re.search(r"\d\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
total_average_power = float(string)
string = ""
if (line.find('1.averagePower') != -1):
m = re.search(r"\d\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
total_average_power = total_average_power + float(string)
else:
config_tag = str(l1d_size) + "_" + str(l1d_assoc) + "_" + str(l1d_taglat) + "_l2_absent_" + str(mul_oplat)
string = "build/X86/gem5.opt configs/learning_gem5/part1/o3_without_l2.py --l1d_size=\"" + str(l1d_size) + "\" --l1d_assoc=\"" + str(l1d_assoc) + "\" --l1d_tag_lat=\"" + str(l1d_taglat) + "\" --SimdFloatMul_OpLatency=\""+ str(mul_oplat) + "\""
os.system(string)
file_handle = open("m5out/stats.txt","r")
lines = file_handle.readlines()
string = ""
for line in lines:
if (line.find('simTicks') != -1):
m = re.search(r"\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
performance = int(string)
string = ""
if (line.find('0.averagePower') != -1):
m = re.search(r"\d\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
total_average_power = float(string)
string = ""
if (line.find('1.averagePower') != -1):
m = re.search(r"\d\d",line)
iterator = m.start()
while line[iterator] != " ":
string = string + line[iterator]
iterator = iterator + 1
total_average_power = total_average_power + float(string)
return performance, total_average_power, config_tag
# Irrespective of memory size we can knock of L2 based on need, use the mean parameters for assoc, tag lat and mul oplat
count = 0
if memory_size == True:
for i in range (stride_range):
perf_with_l2,_,__ = execute(True,tight_l1d_size[i], 2, 2, "256kB", 1)
perf_without_l2,_,__ = execute(False,tight_l1d_size[i], 2, 2, "256kB", 1)
if perf_with_l2 >= perf_without_l2:
count = count + 1
else:
for i in range (stride_range):
perf_with_l2,_,__ = execute(True,loose_l1d_size[i], 2, 2, "256kB", 1)
perf_without_l2,_,__ = execute(False,tight_l1d_size[i], 2, 2, "256kB", 1)
if perf_with_l2 >= perf_without_l2:
count = count + 1
# Only if both the strides had better perf use L2
if count == 2:
l2_present = 0
else:
l2_present = 1
print("L2 Cache is present!")
perf_dict = {}
power_dict = {}
l1d_size_dict = {}
if l2_present == 1:
for i in range (stride_range):
for j in range (stride_range):
for k in range (stride_range):
for l in range (stride_range):
for m in range (stride_range):
perf, power, config = execute (True,l1d_size[i],int(l1d_assoc[j]),int(l1d_taglat[k]),l2_size[l],int(mul_oplat[m]))
perf_dict.update({config:perf})
power_dict.update({config:power})
l1d_size_dict.update({config:l1d_size[i]})
else:
for i in range (stride_range):
for j in range (stride_range):
for k in range (stride_range):
for l in range (stride_range):
perf, power, config = execute (False,l1d_size[i],int(l1d_assoc[j]),int(l1d_taglat[k]),"",int(mul_oplat[l]))
perf_dict.update({config:perf})
power_dict.update({config:power})
l1d_size_dict.update({config:l1d_size[i]})
#print(l1d_size_dict)
#print(perf_dict)
#print(power_dict)
#Find the top 5 performers in terms of performance or power
local_dict = dict(sorted(perf_dict.items(), key=lambda item: item[1]))
iterator = 0
sorted_perf_dict = {}
for i in (local_dict):
if iterator < 5:
sorted_perf_dict.update({i:local_dict[i]})
iterator = iterator + 1
#print(sorted_perf_dict)
local_dict = dict(sorted(power_dict.items(), key=lambda item: item[1]))
iterator = 0
sorted_power_dict = {}
for i in (local_dict):
if iterator < 5:
sorted_power_dict.update({i:local_dict[i]})
iterator = iterator + 1
#print(sorted_power_dict)
#Find the right guy to get benefits based on L1 cache size
iterator = 0
if Power == False:
#For power and area based scores
for key in sorted_perf_dict:
if iterator > 0:
if l1d_size_dict[key] < min_size:
min_size = l1d_size_dict[key]
if iterator == 0:
min_size = l1d_size_dict[key]
iterator = iterator + 1
score = 100
score_dict = {}
for key in sorted_perf_dict:
if l1d_size_dict[key] == min_size:
score_dict.update({key:(score + 7)}) #More benefit for a smaller area cache
else:
score_dict.update({key:(score - 9)}) #Higher loss for a higher area cache
score = score - 5
else:
#For the performance and area based scores
for key in sorted_power_dict:
if iterator > 0:
if l1d_size_dict[key] < min_size:
min_size = l1d_size_dict[key]
if iterator == 0:
min_size = l1d_size_dict[key]
iterator = iterator + 1
score = 100
score_dict = {}
for key in sorted_power_dict:
if l1d_size_dict[key] == min_size:
score_dict.update({key:(score + 7)}) #Benefit for a smaller area cache
else:
score_dict.update({key:(score - 9)}) #Higher loss for a higher area cache
score = score - 5
#score_dict will have the final results
score_dict = dict(sorted(score_dict.items(), key=lambda item: item[1]))
print(score_dict)