-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathplot_scan.py
executable file
·173 lines (148 loc) · 5.32 KB
/
plot_scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#! /usr/bin/env python3
import sys
import os.path
import argparse
parser = argparse.ArgumentParser(
description = 'Plot the data points in one or more CSV files produced by the "scan" script.',
formatter_class = argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('files',
metavar = 'FILE',
nargs = '+',
type = argparse.FileType('r'),
help = 'data files to plot')
parser.add_argument('-o', '--output',
metavar = 'FILE',
default = 'plot.png',
type = argparse.FileType('wb', 0),
help = 'save the plot to FILE')
parser.add_argument('-z', '--zoom',
metavar = 'FILE',
default = argparse.SUPPRESS,
nargs = '?',
const = 'zoom.png',
type = argparse.FileType('wb', 0),
help = 'produce a zoomed-in version of the plot and save it to FILE, or zoom.png if FILE is omitted (default: do not produce a zoomed plot)')
parser.add_argument('-n', '--normalise',
action = 'store_true',
default = False,
help = 'plot the average throughput per job instead of the total throughput across all jobs')
parser.add_argument('-x', '--x-axis',
choices = ['CPU threads per job', 'CPU threads', 'EDM streams per job', 'EDM streams'],
default = 'EDM streams',
help = 'plot vs the number of CPU threads or EDM streams, overall or per job')
parser.add_argument('-l', '--labels',
dest = 'labels',
nargs = '+',
default = None,
help = 'list of labels to display in the legend. The default is None, which means extract the legend names from the filenames.')
args = parser.parse_args()
import numpy as np
import pandas as pd
import matplotlib as mpl
mpl.use('agg')
import seaborn as sns
# plot content options
options = {
'normalise': False, # True: plot the average throughput per job, False: plot the total throughput
'x axis': 'EDM streams', # 'CPU threads per job', 'CPU threads', 'EDM streams per job', 'EDM streams'
}
# workaround for seaborn 0.9.0
def fix_plot_range(plot, zoom = False):
data = plot.data[plot._x_var]
xmin = min(data)
xmax = max(data)
step = (xmax - xmin) * 0.05
plot.set(xlim=(xmin - step, xmax + step))
if not zoom:
plot.set(ylim=(0, None))
sns.set(style={ # based on 'whitegrid'
'axes.axisbelow': True,
'axes.edgecolor': '.15', # .8
'axes.facecolor': 'white',
'axes.grid': True,
'axes.labelcolor': '.15',
'axes.linewidth': 1,
'figure.facecolor': 'white',
'font.family': ['sans-serif'],
'font.sans-serif': ['Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', 'sans-serif'],
'grid.color': '.8',
'grid.linestyle': '-',
'image.cmap': 'rocket',
'legend.frameon': False,
'legend.numpoints': 1,
'legend.scatterpoints': 1,
'lines.solid_capstyle': 'round',
'text.color': '.15',
'xtick.color': '.15',
'xtick.direction': 'out',
'xtick.major.size': 0,
'xtick.minor.size': 0,
'ytick.color': '.15',
'ytick.direction': 'out',
'ytick.major.size': 0,
'ytick.minor.size': 0,
})
sns.set_palette([
(0., 0., 1.), # ROOT kBlue
(1., 0., 0.), # ROOT kRed
(0., 0., 0.), # ROOT kBlack
(1., 0.4, 0.), # ROOT kOrange +7
(0.8, 0.2, 0.8), # ROOT kMagenta -3
], 5)
data = []
for label_idx,file in enumerate(args.files):
# expected file format:
# jobs, overlap, CPU threads per job, EDM streams per job, GPUs per jobs, number of events, average throughput (ev/s), uncertainty (ev/s)
# 2, 0.994863, 6, 6, 1, 4000, 3591.314398, 1.665309
# ...
values = pd.read_csv(file).rename(columns=lambda x: x.strip())
if args.labels:
if len(args.labels) != len(args.files):
print("Labels mismatch with input filenames {}".format(args.labels))
sys.exit(1)
values.insert(0, 'name', args.labels[label_idx], True)
data.append(values)
# if the data does not have a name, build it from the file name
if not 'name' in values:
name = os.path.basename(file.name)
if '.' in name:
i = name.rindex('.')
name = name[:i]
values.insert(0, 'name', [ name ] * len(values), True)
data.append(values)
df = pd.concat(data, ignore_index = True)
del data
# normalise to the number of jobs
if args.normalise:
df['average throughput (ev/s)'] /= df['jobs']
df['uncertainty (ev/s)'] /= df['jobs']
# compute the total numer of CPU threads and EDM streams
df['CPU threads'] = df['CPU threads per job'] * df['jobs']
df['EDM streams'] = df['EDM streams per job'] * df['jobs']
sides = 1
markers = [(sides * ((n//3)%3 + 4), (n % 3), 0) for n,_ in enumerate(df['name'].unique())]
print(markers)
plot = sns.lmplot(
data = df,
x = args.x_axis,
y = 'average throughput (ev/s)',
fit_reg = True, # estimate and plot a regression model
order = 4, # polynomial fit
hue = 'name', # different categories
height = 5.4, # plot height in inches, at 100 dpi
aspect = 16./9., # plot aspect ratio
legend = True,
legend_out = True, # show the legend to the right of the plot
truncate = False,
markers = markers,
scatter_kws={"s": 80},
ci = 95.,
)
# zoomed-in version of the plot
if 'zoom' in args:
fix_plot_range(plot, zoom = True)
plot.savefig(args.zoom)
# full Y axis
fix_plot_range(plot)
plot.savefig(args.output)