Skip to content

Commit e1b1f15

Browse files
author
Gheorghe-Teodor Bercea
committed
Fix iaca for lhs.
1 parent 2912f4d commit e1b1f15

File tree

2 files changed

+33
-12
lines changed

2 files changed

+33
-12
lines changed

pyop2/host.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,13 @@ def _iaca_ast_to_c(self, ast, opts={}):
8585
iaca_ast, last, nest, loop_count = coffee.utils.insert_iaca(ast, iakify)
8686
if not last:
8787
iakify += 1
88-
ast_handler = ASTKernel(iaca_ast, self._include_dirs)
89-
ast_handler.plan_cpu(opts)
90-
self._applied_blas = ast_handler.blas
91-
self._applied_ap = ast_handler.ap
92-
iaca_kernels.append([ast_handler.gencode(), nest, loop_count])
88+
# ast_handler = ASTKernel(iaca_ast, self._include_dirs)
89+
# ast_handler.plan_cpu(opts)
90+
# self._applied_blas = ast_handler.blas
91+
# self._applied_ap = ast_handler.ap
92+
# iaca_kernels.append([ast_handler.gencode(), nest, loop_count])
93+
# from IPython import embed; embed()
94+
iaca_kernels.append([iaca_ast.gencode(), nest, loop_count])
9395
return iaca_kernels
9496

9597

@@ -777,18 +779,25 @@ def sum_nested_flop_values(self, iaca_kernels, val_pos):
777779
for ind, ic in enumerate(reversed(iaca_kernels)):
778780
if ic[1] == prev_nest:
779781
i_flops += (ic[2] if ic[3] else 1) * ic[val_pos]
782+
# print "same nest: ", i_flops
780783
elif ic[1] < prev_nest:
781784
iaca_block_flops = 0
782785
for ic_prev in iaca_kernels[len(iaca_kernels) - ind:]:
783786
if prev_nest == ic_prev[1]:
784787
iaca_block_flops += ic_prev[val_pos]
785-
else:
786-
break
787-
i_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
788+
# else:
789+
# break
790+
curr_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
791+
if ic[6] == 1:
792+
i_flops = min(curr_flops, (ic[2] if ic[3] else 1) * ic[val_pos])
793+
else:
794+
i_flops = curr_flops
795+
# print "prev nest greater: ", i_flops
788796
else:
789797
# Not tested yet. Might never apply to FFC kernels.
790798
total_flops += i_flops
791799
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
800+
# print "prev nest smaller: ", i_flops
792801
prev_nest = ic[1]
793802
total_flops += i_flops
794803
return total_flops
@@ -807,32 +816,37 @@ def sum_nested_cycle_values(self, iaca_kernels, val_pos):
807816
if ic[1] == prev_nest:
808817
# Add the cycles or flops and multiply by loop counter if the loop is not unrolled
809818
i_flops += (ic[2] if ic[3] else 1) * ic[val_pos]
819+
# print "same nest: ", i_flops
810820
# if the current loop contains the previous one (has a lower nest number).
811821
elif ic[1] < prev_nest:
812822
# if there is only one jump instruction and the loop is not unrolled (i.e. inner loops unrolled but current not unrolled)
813823
if ic[6] == 1 and ic[3]:
814824
# loop counter times the number of cycles
815825
i_flops = ic[2] * ic[val_pos]
826+
# print "greater nest 1: ", i_flops
816827
else:
817828
# sum up the values for the static cycle counts for the loops contaied in the current loop
818829
# regardless of their unrolled/not unrolled status
819830
iaca_block_flops = 0
820831
for ic_prev in iaca_kernels[len(iaca_kernels) - ind:]:
821832
if prev_nest == ic_prev[1]:
822833
iaca_block_flops += ic_prev[val_pos]
823-
else:
824-
break
834+
# else:
835+
# break
825836
# if there are less cycles to be done in the current loop then return that count
826837
if iaca_block_flops > ic[val_pos]:
827838
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
839+
# print "greater nest 2: ", i_flops
828840
else:
829841
# iflops contains the flop count for the inner loops, add to that the flops generated by the rest of the code
830842
# in the current loop
831843
i_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
844+
# print "greater nest 3: ", i_flops
832845
else:
833846
# Not tested yet. Might never apply to FFC kernels.
834847
total_flops += i_flops
835848
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
849+
# print "smaller nest: ", i_flops
836850
prev_nest = ic[1]
837851
total_flops += i_flops
838852
return total_flops
@@ -985,6 +999,7 @@ def compile(self, argtypes=None, restype=None):
985999
wrapper_code['iaca_end'] = ""
9861000
for ind in range(1, len(iaca_kernels)):
9871001
iaca_kernels[ind][0] = self.get_c_code(iaca_kernels[ind][0], wrapper_code)
1002+
for ind in range(1, len(iaca_cycle_kernels)):
9881003
iaca_cycle_kernels[ind][0] = self.get_c_code(iaca_cycle_kernels[ind][0], wrapper_code)
9891004
iaca_path = path_to_iaca_file + region_name + "_" + self._kernel._md5 + ".txt"
9901005
self.build_loop_nest_reports(iaca_kernels, wrapper_code, iaca_path, compilation, extension, cppargs, ldargs, argtypes, restype, compiler)

pyop2/record.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def __init__(self,
119119
self.folds["iaca_flops"] = fold_stats
120120
# IACA reported cycles for the loop over the columns (extruded mesh)
121121
self._cycles = []
122-
self.folds["cycles"] = fold_stats
122+
self.folds["cycles"] = SUM if self._is_proc else AVG
123123

124124
def _ncalls(self):
125125
"""Number of calls per process"""
@@ -218,7 +218,7 @@ def plot_list(self, frequency):
218218
self.v_volume, self.m_volume, self.mv_volume,
219219
self.vbw, self.mbw, self.mvbw, self.rvbw,
220220
self.iaca_flops, self.papi_flops,
221-
self.iaca_mflops, self.papi_mflops, self.cycles / frequency, self.c_runtime]
221+
self.iaca_mflops, self.papi_mflops, self.cycles * 1.0 / frequency, self.c_runtime]
222222

223223
@property
224224
def name(self):
@@ -278,6 +278,8 @@ def iaca_flops(self):
278278

279279
@property
280280
def cycles(self):
281+
if not self._is_proc:
282+
return self._reduce("cycles", self._cycles)
281283
return self._reduce("cycles", self._cycles) / 1e9
282284

283285
#################################################
@@ -358,10 +360,14 @@ def cycles(self, value):
358360

359361
@property
360362
def runtime(self):
363+
if not self._is_proc:
364+
return self.end_time - self.start_time
361365
return self._ncalls() * (self.end_time - self.start_time)
362366

363367
@property
364368
def rv_runtime(self):
369+
if not self._is_proc:
370+
return self.rv_end_time - self.rv_start_time
365371
return self._ncalls() * (self.rv_end_time - self.rv_start_time)
366372

367373
@property

0 commit comments

Comments
 (0)