Skip to content

Commit

Permalink
distibutions: Extend flexibility of non-uniform random distributions
Browse files Browse the repository at this point in the history
This change affects options random_distribution and file_service_type.
For pareto, zipf and gauss distribution a contept of `center` is implemented.
It allows to fix in place a value that is most probable to access.
Example:
fio --randseed=1 --ioengine=libaio --rw=randwrite --nrfiles=16 --bs=4k \
    --size=256m --allow_file_create=1 --write_iolog=log.txt \
    --file_service_type=gauss:10:0.1 --filename_format=object.\$filenum --name=x

cat log.txt |grep write |cut -f 1 -d " " |sort |uniq -c | sort -n | \
sed "s/[.]/ /" | while read a b c; do echo $c $b $a; done |sort -n
0 object 13429
1 object 17928
2 object 14724
3 object 7845
4 object 2476
5 object 468
6 object 44
7 object 3
12 object 24
13 object 318
14 object 1795
15 object 6482

Signed-off-by: Adam Kupczyk <[email protected]>
  • Loading branch information
aclamk committed Jan 11, 2021
1 parent 674428a commit a87c90f
Show file tree
Hide file tree
Showing 14 changed files with 89 additions and 24 deletions.
10 changes: 9 additions & 1 deletion HOWTO
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ I/O type
limit reads or writes to a certain rate. If that is the case, then the
distribution may be skewed. Default: 50.

.. option:: random_distribution=str:float[,str:float][,str:float]
.. option:: random_distribution=str:float[:float][,str:float][,str:float]

By default, fio will use a completely uniform random distribution when asked
to perform random I/O. Sometimes it is useful to skew the distribution in
Expand Down Expand Up @@ -1396,6 +1396,14 @@ I/O type
map. For the **normal** distribution, a normal (Gaussian) deviation is
supplied as a value between 0 and 100.

The second, optional float is allowed for **pareto**, **zipf** and **normal** distributions.
It allows to set base of distribution in non-default place, giving more control
over most probable outcome. This value is in range [0-1] which maps linearly to
range of possible random values.
Defaults are: random for **pareto** and **zipf**, and 0.5 for **normal**.
If you wanted to use **zipf** with a `theta` of 1.2 centered on 1/4 of allowed value range,
you would use ``random_distibution=zipf:1.2:0.25``.

For a **zoned** distribution, fio supports specifying percentages of I/O
access that should fall within what range of the file or device. For
example, given a criteria of:
Expand Down
2 changes: 2 additions & 0 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
o->zipf_theta.u.f = fio_uint64_to_double(le64_to_cpu(top->zipf_theta.u.i));
o->pareto_h.u.f = fio_uint64_to_double(le64_to_cpu(top->pareto_h.u.i));
o->gauss_dev.u.f = fio_uint64_to_double(le64_to_cpu(top->gauss_dev.u.i));
o->random_center.u.f = fio_uint64_to_double(le64_to_cpu(top->random_center.u.i));
o->random_generator = le32_to_cpu(top->random_generator);
o->hugepage_size = le32_to_cpu(top->hugepage_size);
o->rw_min_bs = le64_to_cpu(top->rw_min_bs);
Expand Down Expand Up @@ -423,6 +424,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->zipf_theta.u.i = __cpu_to_le64(fio_double_to_uint64(o->zipf_theta.u.f));
top->pareto_h.u.i = __cpu_to_le64(fio_double_to_uint64(o->pareto_h.u.f));
top->gauss_dev.u.i = __cpu_to_le64(fio_double_to_uint64(o->gauss_dev.u.f));
top->random_center.u.i = __cpu_to_le64(fio_double_to_uint64(o->random_center.u.f));
top->random_generator = cpu_to_le32(o->random_generator);
top->hugepage_size = cpu_to_le32(o->hugepage_size);
top->rw_min_bs = __cpu_to_le64(o->rw_min_bs);
Expand Down
6 changes: 3 additions & 3 deletions filesetup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1319,11 +1319,11 @@ static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
seed = td->rand_seeds[4];

if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed);
zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, td->o.random_center.u.f, seed);
else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed);
pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, td->o.random_center.u.f, seed);
else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS)
gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, seed);
gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, td->o.random_center.u.f, seed);
}

static bool init_rand_distribution(struct thread_data *td)
Expand Down
10 changes: 9 additions & 1 deletion fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ first. This may interfere with a given rate setting, if fio is asked to
limit reads or writes to a certain rate. If that is the case, then the
distribution may be skewed. Default: 50.
.TP
.BI random_distribution \fR=\fPstr:float[,str:float][,str:float]
.BI random_distribution \fR=\fPstr:float[:float][,str:float][,str:float]
By default, fio will use a completely uniform random distribution when asked
to perform random I/O. Sometimes it is useful to skew the distribution in
specific ways, ensuring that some parts of the data is more hot than others.
Expand Down Expand Up @@ -1168,6 +1168,14 @@ option. If a non\-uniform model is used, fio will disable use of the random
map. For the \fBnormal\fR distribution, a normal (Gaussian) deviation is
supplied as a value between 0 and 100.
.P
The second, optional float is allowed for \fBpareto\fR, \fBzipf\fR and \fBnormal\fR
distributions. It allows to set base of distribution in non-default place, giving
more control over most probable outcome. This value is in range [0-1] which maps linearly to
range of possible random values.
Defaults are: random for \fBpareto\fR and \fBzipf\fR, and 0.5 for \fBnormal\fR.
If you wanted to use \fBzipf\fR with a `theta` of 1.2 centered on 1/4 of allowed value range,
you would use `random_distibution=zipf:1.2:0.25`.
.P
For a \fBzoned\fR distribution, fio supports specifying percentages of I/O
access that should fall within what range of the file or device. For
example, given a criteria of:
Expand Down
1 change: 1 addition & 0 deletions fio.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ struct thread_data {
double pareto_h;
double gauss_dev;
};
double random_center;
int error;
int sig;
int done;
Expand Down
6 changes: 3 additions & 3 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -971,13 +971,13 @@ static void init_rand_file_service(struct thread_data *td)
const unsigned int seed = td->rand_seeds[FIO_RAND_FILE_OFF];

if (td->o.file_service_type == FIO_FSERVICE_ZIPF) {
zipf_init(&td->next_file_zipf, nranges, td->zipf_theta, seed);
zipf_init(&td->next_file_zipf, nranges, td->zipf_theta, td->random_center, seed);
zipf_disable_hash(&td->next_file_zipf);
} else if (td->o.file_service_type == FIO_FSERVICE_PARETO) {
pareto_init(&td->next_file_zipf, nranges, td->pareto_h, seed);
pareto_init(&td->next_file_zipf, nranges, td->pareto_h, td->random_center, seed);
zipf_disable_hash(&td->next_file_zipf);
} else if (td->o.file_service_type == FIO_FSERVICE_GAUSS) {
gauss_init(&td->next_file_gauss, nranges, td->gauss_dev, seed);
gauss_init(&td->next_file_gauss, nranges, td->gauss_dev, td->random_center, seed);
gauss_disable_hash(&td->next_file_gauss);
}
}
Expand Down
8 changes: 6 additions & 2 deletions lib/gauss.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ unsigned long long gauss_next(struct gauss_state *gs)
if (!gs->disable_hash)
sum = __hash_u64(sum);

return sum % gs->nranges;
return (sum + gs->rand_off) % gs->nranges;
}

void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev,
unsigned int seed)
double center, unsigned int seed)
{
memset(gs, 0, sizeof(*gs));
init_rand_seed(&gs->r, seed, 0);
Expand All @@ -55,6 +55,10 @@ void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev,
if (gs->stddev > nranges / 2)
gs->stddev = nranges / 2;
}
if (center == -1)
gs->rand_off = 0;
else
gs->rand_off = nranges * (center - 0.5);
}

void gauss_disable_hash(struct gauss_state *gs)
Expand Down
3 changes: 2 additions & 1 deletion lib/gauss.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ struct gauss_state {
struct frand_state r;
uint64_t nranges;
unsigned int stddev;
unsigned int rand_off;
bool disable_hash;
};

void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev,
unsigned int seed);
double center, unsigned int seed);
unsigned long long gauss_next(struct gauss_state *gs);
void gauss_disable_hash(struct gauss_state *gs);

Expand Down
12 changes: 7 additions & 5 deletions lib/zipf.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,21 @@ static void zipf_update(struct zipf_state *zs)
}

static void shared_rand_init(struct zipf_state *zs, uint64_t nranges,
unsigned int seed)
double center, unsigned int seed)
{
memset(zs, 0, sizeof(*zs));
zs->nranges = nranges;

init_rand_seed(&zs->rand, seed, 0);
zs->rand_off = __rand(&zs->rand);
if (center != -1)
zs->rand_off = nranges * center;
}

void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta,
unsigned int seed)
double center, unsigned int seed)
{
shared_rand_init(zs, nranges, seed);
shared_rand_init(zs, nranges, center, seed);

zs->theta = theta;
zs->zeta2 = pow(1.0, zs->theta) + pow(0.5, zs->theta);
Expand Down Expand Up @@ -71,9 +73,9 @@ uint64_t zipf_next(struct zipf_state *zs)
}

void pareto_init(struct zipf_state *zs, uint64_t nranges, double h,
unsigned int seed)
double center, unsigned int seed)
{
shared_rand_init(zs, nranges, seed);
shared_rand_init(zs, nranges, center, seed);
zs->pareto_pow = log(h) / log(1.0 - h);
}

Expand Down
6 changes: 4 additions & 2 deletions lib/zipf.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ struct zipf_state {
bool disable_hash;
};

void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, unsigned int seed);
void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta,
double center, unsigned int seed);
uint64_t zipf_next(struct zipf_state *zs);

void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, unsigned int seed);
void pareto_init(struct zipf_state *zs, uint64_t nranges, double h,
double center, unsigned int seed);
uint64_t pareto_next(struct zipf_state *zs);
void zipf_disable_hash(struct zipf_state *zs);

Expand Down
39 changes: 37 additions & 2 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,27 @@ static char *get_opt_postfix(const char *str)
return strdup(p);
}

static bool split_parse_distr(const char *str, double *val, double *center)
{
char *cp, *p;
bool r;

p = strdup(str);
if (!p)
return false;

cp = strstr(p, ":");
r = true;
if (cp) {
*cp = '\0';
cp++;
r = str_to_float(cp, center, 0);
}
r = r && str_to_float(p, val, 0);
free(p);
return r;
}

static int bs_cmp(const void *p1, const void *p2)
{
const struct bssplit *bsp1 = p1;
Expand Down Expand Up @@ -787,6 +808,7 @@ static int str_fst_cb(void *data, const char *str)
{
struct thread_data *td = cb_data_to_td(data);
double val;
double center = -1;
bool done = false;
char *nr;

Expand Down Expand Up @@ -821,14 +843,20 @@ static int str_fst_cb(void *data, const char *str)
return 0;

nr = get_opt_postfix(str);
if (nr && !str_to_float(nr, &val, 0)) {
if (nr && !split_parse_distr(nr, &val, &center)) {
log_err("fio: file service type random postfix parsing failed\n");
free(nr);
return 1;
}

free(nr);

if (center != -1 && (center < 0.00 || center > 1.00)) {
log_err("fio: distribution center out of range (0 <= center <= 1.0)\n");
return 1;
}
td->random_center = center;

switch (td->o.file_service_type) {
case FIO_FSERVICE_ZIPF:
if (val == 1.00) {
Expand Down Expand Up @@ -1030,6 +1058,7 @@ static int str_random_distribution_cb(void *data, const char *str)
{
struct thread_data *td = cb_data_to_td(data);
double val;
double center = -1;
char *nr;

if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
Expand All @@ -1046,14 +1075,20 @@ static int str_random_distribution_cb(void *data, const char *str)
return 0;

nr = get_opt_postfix(str);
if (nr && !str_to_float(nr, &val, 0)) {
if (nr && !split_parse_distr(nr, &val, &center)) {
log_err("fio: random postfix parsing failed\n");
free(nr);
return 1;
}

free(nr);

if (center != -1 && (center < 0.00 || center > 1.00)) {
log_err("fio: distribution center out of range (0 <= center <= 1.0)\n");
return 1;
}
td->o.random_center.u.f = center;

if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) {
if (val == 1.00) {
log_err("fio: zipf theta must different than 1.0\n");
Expand Down
2 changes: 1 addition & 1 deletion server.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
};

enum {
FIO_SERVER_VER = 86,
FIO_SERVER_VER = 87,

FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
Expand Down
6 changes: 3 additions & 3 deletions t/genzipf.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,11 @@ int main(int argc, char *argv[])
nranges /= block_size;

if (dist_type == TYPE_ZIPF)
zipf_init(&zs, nranges, dist_val, 1);
zipf_init(&zs, nranges, dist_val, -1, 1);
else if (dist_type == TYPE_PARETO)
pareto_init(&zs, nranges, dist_val, 1);
pareto_init(&zs, nranges, dist_val, -1, 1);
else
gauss_init(&gs, nranges, dist_val, 1);
gauss_init(&gs, nranges, dist_val, -1, 1);

hash_bits = 0;
hash_size = nranges;
Expand Down
2 changes: 2 additions & 0 deletions thread_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct thread_options {
fio_fp64_t zipf_theta;
fio_fp64_t pareto_h;
fio_fp64_t gauss_dev;
fio_fp64_t random_center;

unsigned int random_generator;

Expand Down Expand Up @@ -467,6 +468,7 @@ struct thread_options_pack {
fio_fp64_t zipf_theta;
fio_fp64_t pareto_h;
fio_fp64_t gauss_dev;
fio_fp64_t random_center;

uint32_t random_generator;

Expand Down

0 comments on commit a87c90f

Please sign in to comment.