forked from distributed-system-analysis/smallfile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
366 lines (326 loc) · 13.6 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
parse.py -- parses CLI commands for smallfile_cli.py
Copyright 2012 -- Ben England
Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
See Appendix on this page for instructions pertaining to license.
'''
import sys
import os
import smallfile
from smallfile import SmallfileWorkload, NOTOK
import smf_test_params
version = '3.0' # bumped to 3 since major code changes for PEP8 and python3
# convert boolean value into 'Y' or 'N'
def bool2YN(boolval):
if boolval:
return 'Y'
return 'N'
def usage(msg): # call if CLI syntax error or invalid parameter
opnames = ' --operation '
for op in SmallfileWorkload.all_op_names:
opnames += op + '|'
opnames = opnames[:-1]
dflts = SmallfileWorkload()
print('')
print('ERROR: ' + msg)
print('usage: smallfile_cli.py ')
print(opnames)
print(' --top top-dir | top-dir1,top-dir2,...,top-dirN (default: %s)' %
SmallfileWorkload.tmp_dir)
print(' --host-set h1,h2,...,hN')
print(' --network-sync-dir directory-path (default: %s' %
os.path.join(SmallfileWorkload.tmp_dir, 'network_shared'))
print(' --files positive-integer (default: %d)' %
dflts.iterations)
print(' --files-per-dir positive-integer (default: %d)' %
dflts.files_per_dir)
print(' --dirs-per-dir positive-integer (default: %d)' %
dflts.dirs_per_dir)
print(' --threads positive-integer (default: %d)' %
2)
print(' --record-size non-negative-integer-KB (default: %d)' %
dflts.record_sz_kb)
print(' --record-ctime-size (default: N)')
print(' --xattr-size non-negative-integer-bytes (default: %d)' %
dflts.xattr_size)
print(' --xattr-count non-negative-integer-bytes (default: %d)' %
dflts.xattr_count)
print(' --file-size-distribution exponential ' +
'(default: fixed-size)')
print(' --permute-host-dirs Y|N (default: N)')
print(' --hash-into-dirs Y|N (default: %s)' %
bool2YN(dflts.hash_to_dir))
print(' --file-size non-negative-integer-KB (default: %d)' %
dflts.total_sz_kb)
print(' --prefix alphanumeric-string')
print(' --suffix alphanumeric-string')
print(' --fsync Y|N (default: %s)' %
bool2YN(dflts.fsync))
print(' --finish Y|N (default: %s)' %
bool2YN(dflts.finish_all_rq))
print(' --incompressible Y|N (default: %s)' %
bool2YN(dflts.verify_read))
print(' --verify-read Y|N (default: %s)' %
bool2YN(dflts.verify_read))
print(' --response-times Y|N (default: %s)' %
bool2YN(dflts.measure_rsptimes))
print(' --same-dir Y|N (default: %s)' %
bool2YN(dflts.is_shared_dir))
print(' --pause microsec (default: %d)' %
dflts.pause_between_files)
print(' --remote-pgm-dir directory-pathname (default: %s)' %
os.getcwd())
sys.exit(NOTOK)
# convert boolean command line parameter value into True/False
def str2bool(val, prmname):
if val == 'y' or val == 'Y':
return True
if val == 'n' or val == 'N':
return False
usage('boolean parameter "%s" must be either Y or N' % prmname)
# ensure that input integer is non-negative
def chkNonNegInt(intval, prm):
try:
v = int(intval)
except ValueError:
usage('parameter "%s" must be an integer' % prm)
if v < 0:
usage('integer parameter "%s" must be non-negative' % prm)
# ensure that input integer is positive
def chkPositiveInt(intval, prm):
chkNonNegInt(intval, prm)
if int(intval) == 0:
usage('integer parameter "%s" must be positive' % prm)
# return tuple containing:
# list of hosts participating in test
# list of subprocess instances initialized with test parameters
# top directory
# remote command to pass to client host via ssh
# are we slave or master?
def parse():
# define parameter variables
# default does short test in /var/tmp so you can see the program run
# store as much as you can in SmallfileWorkload object
# so per-thread invocations inherit
inv = SmallfileWorkload()
# parameters that can't be stored in a SmallfileWorkload
# describe how the SmallfileWorkload threads work together
prm_thread_count = 2
prm_host_set = None
prm_slave = False
prm_permute_host_dirs = False
prm_remote_pgm_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
prm_top_dirs = None
prm_network_sync_dir = None
# parse command line
argc = len(sys.argv)
pass_on_prm_list = '' # parameters passed to remote hosts if needed
if argc == 1:
print('''
for additional help add the parameter "--help" to the command
''')
j = 1
while j < argc:
rawprm = sys.argv[j]
if rawprm == '-h' or rawprm == '--help':
usage('ok, so you need help, we all knew that ;-)')
if rawprm[0:2] != '--':
usage('parameter names begin with "--"')
prm = rawprm[2:]
if j == argc - 1 and argc % 2 != 1:
usage('all parameters consist of a name and a value')
val = sys.argv[j + 1]
if len(rawprm) < 3:
usage('parameter name not long enough')
pass_on_prm = rawprm + ' ' + val
j += 2
if prm == 'files':
chkPositiveInt(val, rawprm)
inv.iterations = int(val)
elif prm == 'threads':
chkPositiveInt(val, rawprm)
prm_thread_count = int(val)
elif prm == 'files-per-dir':
chkPositiveInt(val, rawprm)
inv.files_per_dir = int(val)
elif prm == 'dirs-per-dir':
chkPositiveInt(val, rawprm)
inv.dirs_per_dir = int(val)
elif prm == 'record-size':
chkNonNegInt(val, rawprm)
inv.record_sz_kb = int(val)
elif prm == 'file-size':
chkNonNegInt(val, rawprm)
inv.total_sz_kb = int(val)
elif prm == 'file-size-distribution':
if val != 'exponential':
usage('unrecognized file size distribution: %s' % val)
inv.filesize_distr = \
SmallfileWorkload.fsdistr_random_exponential
elif prm == 'xattr-size':
chkNonNegInt(val, rawprm)
inv.xattr_size = int(val)
elif prm == 'xattr-count':
chkNonNegInt(val, rawprm)
inv.xattr_count = int(val)
elif prm == 'prefix':
inv.prefix = val
elif prm == 'suffix':
inv.suffix = val
elif prm == 'hash-into-dirs':
inv.hash_to_dir = str2bool(val, rawprm)
elif prm == 'operation':
if not SmallfileWorkload.all_op_names.__contains__(val):
usage('unrecognized operation name: %s' % val)
inv.opname = val
elif prm == 'top':
prm_top_dirs = [os.path.abspath(p) for p in val.split(',')]
for p in prm_top_dirs:
if not os.path.isdir(p):
usage('you must ensure that shared directory' +
('%s ' % p) +
'is accessible ' +
'from this host and every remote host in test')
elif prm == 'pause':
chkPositiveInt(val, rawprm)
inv.pause_between_files = int(val)
elif prm == 'stonewall':
inv.stonewall = str2bool(val, rawprm)
elif prm == 'finish':
inv.finish_all_rq = str2bool(val, rawprm)
elif prm == 'fsync':
inv.fsync = str2bool(val, rawprm)
elif prm == 'record-ctime-size':
inv.record_ctime_size = str2bool(val, rawprm)
elif prm == 'permute-host-dirs':
prm_permute_host_dirs = str2bool(val, rawprm)
pass_on_prm = ''
elif prm == 'response-times':
inv.measure_rsptimes = str2bool(val, rawprm)
elif prm == 'incompressible':
inv.incompressible = str2bool(val, rawprm)
elif prm == 'verify-read':
inv.verify_read = str2bool(val, rawprm)
elif prm == 'same-dir':
inv.is_shared_dir = str2bool(val, rawprm)
elif prm == 'verbose':
inv.verbose = str2bool(val, rawprm)
elif prm == 'log-to-stderr':
inv.log_to_stderr = str2bool(val, rawprm)
elif prm == 'host-set':
if os.path.isfile(val):
f = open(val, 'r')
prm_host_set = [record.strip() for record in
f.readlines()]
else:
prm_host_set = val.split(',')
if len(prm_host_set) < 2:
prm_host_set = val.strip().split()
if len(prm_host_set) == 0:
usage('host list must be non-empty when ' +
'--host-set option used')
pass_on_prm = ''
elif prm == 'remote-pgm-dir':
prm_remote_pgm_dir = val
elif prm == 'network-sync-dir':
prm_network_sync_dir = val
elif prm == 'slave':
# --slave should not be used by end-user
prm_slave = str2bool(val, rawprm)
elif prm == 'as-host':
# --ashost should not be used by end-user
inv.onhost = smallfile.get_hostname(val)
else:
usage('unrecognized parameter name: %s' % prm)
# parameter options that workload generators will need
pass_on_prm_list += ' ' + pass_on_prm
# validate parameters further now that we know what they all are
if inv.record_sz_kb > inv.total_sz_kb and inv.total_sz_kb != 0:
usage('record size cannot exceed file size')
if prm_top_dirs:
for d in prm_top_dirs:
if len(d) < 6:
usage('directory less than 6 characters, ' +
'cannot use top of filesystem, too dangerous')
if prm_top_dirs:
inv.set_top(prm_top_dirs)
else:
prm_top_dirs = inv.top_dirs
if prm_network_sync_dir:
inv.network_dir = prm_network_sync_dir
else:
prm_network_sync_dir = inv.network_dir
inv.starting_gate = os.path.join(inv.network_dir, 'starting_gate.tmp')
if inv.iterations < 10:
inv.stonewall = False
# display results of parse so user knows what default values are
# most important parameters come first
# display host set first because this can be very long,
# this way the rest of the parameters appear together on the screen
size_distribution_string = 'fixed'
if inv.filesize_distr == SmallfileWorkload.fsdistr_random_exponential:
size_distribution_string = 'random exponential'
prm_list = [
('hosts in test', '%s' % prm_host_set),
('top test directory(s)', str(prm_top_dirs)),
('operation', inv.opname),
('files/thread', '%d' % inv.iterations),
('threads', '%d' % prm_thread_count),
('record size (KB, 0 = maximum)', '%d' % inv.record_sz_kb),
('file size (KB)', '%d' % inv.total_sz_kb),
('file size distribution', size_distribution_string),
('files per dir', '%d' % inv.files_per_dir),
('dirs per dir', '%d' % inv.dirs_per_dir),
('threads share directories?', '%s'
% bool2YN(inv.is_shared_dir)),
('filename prefix', inv.prefix),
('filename suffix', inv.suffix),
('hash file number into dir.?', bool2YN(inv.hash_to_dir)),
('fsync after modify?', bool2YN(inv.fsync)),
('pause between files (microsec)', '%d'
% inv.pause_between_files),
('finish all requests?', '%s' % bool2YN(inv.finish_all_rq)),
('stonewall?', '%s' % bool2YN(inv.stonewall)),
('measure response times?', '%s'
% bool2YN(inv.measure_rsptimes)),
('verify read?', '%s' % bool2YN(inv.verify_read)),
('verbose?', inv.verbose),
('log to stderr?', inv.log_to_stderr),
]
if smallfile.xattr_installed:
prm_list.extend([('ext.attr.size', '%d' % inv.xattr_size),
('ext.attr.count', '%d' % inv.xattr_count)])
if prm_host_set:
prm_list.extend([('permute host directories?', '%s'
% bool2YN(prm_permute_host_dirs))])
if prm_remote_pgm_dir:
prm_list.append(('remote program directory',
prm_remote_pgm_dir))
if prm_network_sync_dir:
prm_list.append(('network thread sync. dir.',
prm_network_sync_dir))
if inv.record_sz_kb == 0 and inv.verbose:
print(('record size not specified, ' +
'large files will default to record size %d KB') %
(SmallfileWorkload.biggest_buf_size / inv.BYTES_PER_KB))
if not prm_slave:
print('smallfile version %s' % version)
for (prm_name, prm_value) in prm_list:
print('%40s : %s' % (prm_name, prm_value))
# construct command to run remote slave process using CLI parameters
if not prm_remote_pgm_dir:
prm_remote_pgm_dir = os.getcwd()
# "inv" contains all per-thread parameters
params = smf_test_params.smf_test_params(
prm_host_set,
prm_thread_count,
inv,
prm_remote_pgm_dir,
prm_top_dirs,
prm_network_sync_dir,
prm_slave,
prm_permute_host_dirs,
)
return params