Skip to content

Commit

Permalink
Pre-load and sort random blocks for pure read verify workloads
Browse files Browse the repository at this point in the history
If fio is run with a write phase before a read phase and the
IO type is random, the end read verify phase will get sorted
blocks to read back. But if fio is asked to verify something
that was previously randomly written, it will generate the
same random offsets in random order and verify those. This is
usually much slower than a sorted read back.

So add a verifysort_nr option that allows the user to specify
a number of random offsets to pre-generate and sort, before
reading them and verifying the contents. This greatly speeds
up pure read verify workloads.

Default to 1024, and put a max of 64K entries on the option.
We do a merge list sort on the entries, so we don't want a
huge amount of backlog.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
axboe committed Jan 12, 2013
1 parent 7477673 commit 1ae83d4
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \
engines/mmap.c engines/sync.c engines/null.c engines/net.c \
memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
json.c lib/zipf.c lib/axmap.c lib/lfsr.c gettime-thread.c \
helpers.c
helpers.c lib/flist_sort.c

ifdef CONFIG_64BIT
CFLAGS += -DBITS_PER_LONG=64
Expand Down
1 change: 1 addition & 0 deletions backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,7 @@ static void *thread_main(void *data)
INIT_FLIST_HEAD(&td->io_hist_list);
INIT_FLIST_HEAD(&td->verify_list);
INIT_FLIST_HEAD(&td->trim_list);
INIT_FLIST_HEAD(&td->next_rand_list);
pthread_mutex_init(&td->io_u_lock, NULL);
td->io_hist_tree = RB_ROOT;

Expand Down
3 changes: 3 additions & 0 deletions fio.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ struct thread_options {
unsigned int verify;
unsigned int do_verify;
unsigned int verifysort;
unsigned int verifysort_nr;
unsigned int verify_interval;
unsigned int verify_offset;
char verify_pattern[MAX_PATTERN_SIZE];
Expand Down Expand Up @@ -515,6 +516,8 @@ struct thread_data {
struct flist_head trim_list;
unsigned long trim_entries;

struct flist_head next_rand_list;

/*
* for fileservice, how often to switch to a new file
*/
Expand Down
3 changes: 3 additions & 0 deletions flist.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,7 @@ static inline void flist_splice_init(struct flist_head *list,
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)

extern void flist_sort(void *priv, struct flist_head *head,
int (*cmp)(void *priv, struct flist_head *a, struct flist_head *b));

#endif
100 changes: 79 additions & 21 deletions io_u.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct io_completion_data {
* The ->io_axmap contains a map of blocks we have or have not done io
* to yet. Used to make sure we cover the entire range in a fair fashion.
*/
static int random_map_free(struct fio_file *f, const unsigned long long block)
static int random_map_free(struct fio_file *f, const uint64_t block)
{
return !axmap_isset(f->io_axmap, block);
}
Expand All @@ -36,10 +36,10 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u)
{
unsigned int min_bs = td->o.rw_min_bs;
struct fio_file *f = io_u->file;
unsigned long long block;
unsigned int nr_blocks;
uint64_t block;

block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
block = (io_u->offset - f->file_offset) / (uint64_t) min_bs;
nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;

if (!(io_u->flags & IO_U_F_BUSY_OK))
Expand Down Expand Up @@ -67,24 +67,29 @@ static uint64_t last_block(struct thread_data *td, struct fio_file *f,
if (td->o.zone_range)
max_size = td->o.zone_range;

max_blocks = max_size / (unsigned long long) td->o.ba[ddir];
max_blocks = max_size / (uint64_t) td->o.ba[ddir];
if (!max_blocks)
return 0;

return max_blocks;
}

struct rand_off {
struct flist_head list;
uint64_t off;
};

static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, unsigned long long *b)
enum fio_ddir ddir, uint64_t *b)
{
unsigned long long r, lastb;
uint64_t r, lastb;

lastb = last_block(td, f, ddir);
if (!lastb)
return 1;

if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE) {
unsigned long long rmax;
uint64_t rmax;

rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX;

Expand All @@ -98,7 +103,7 @@ static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,

dprint(FD_RANDOM, "off rand %llu\n", r);

*b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
*b = (lastb - 1) * (r / ((uint64_t) rmax + 1.0));
} else {
uint64_t off = 0;

Expand Down Expand Up @@ -131,22 +136,30 @@ static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,

static int __get_next_rand_offset_zipf(struct thread_data *td,
struct fio_file *f, enum fio_ddir ddir,
unsigned long long *b)
uint64_t *b)
{
*b = zipf_next(&f->zipf);
return 0;
}

static int __get_next_rand_offset_pareto(struct thread_data *td,
struct fio_file *f, enum fio_ddir ddir,
unsigned long long *b)
uint64_t *b)
{
*b = pareto_next(&f->zipf);
return 0;
}

static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, unsigned long long *b)
static int flist_cmp(void *data, struct flist_head *a, struct flist_head *b)
{
struct rand_off *r1 = flist_entry(a, struct rand_off, list);
struct rand_off *r2 = flist_entry(b, struct rand_off, list);

return r1->off - r2->off;
}

static int get_off_from_method(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, uint64_t *b)
{
if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
return __get_next_rand_offset(td, f, ddir, b);
Expand All @@ -159,8 +172,52 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
return 1;
}

static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, uint64_t *b)
{
struct rand_off *r;
int i, ret = 1;

/*
* If sort not enabled, or not a pure random read workload without
* any stored write metadata, just return a random offset
*/
if (!td->o.verifysort_nr || !(ddir == READ && td->o.do_verify &&
td->o.verify != VERIFY_NONE && td_random(td)))
return get_off_from_method(td, f, ddir, b);

if (!flist_empty(&td->next_rand_list)) {
struct rand_off *r;
fetch:
r = flist_entry(td->next_rand_list.next, struct rand_off, list);
flist_del(&r->list);
*b = r->off;
free(r);
return 0;
}

for (i = 0; i < td->o.verifysort_nr; i++) {
r = malloc(sizeof(*r));

ret = get_off_from_method(td, f, ddir, &r->off);
if (ret) {
free(r);
break;
}

flist_add(&r->list, &td->next_rand_list);
}

if (ret && !i)
return ret;

assert(!flist_empty(&td->next_rand_list));
flist_sort(NULL, &td->next_rand_list, flist_cmp);
goto fetch;
}

static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, unsigned long long *b)
enum fio_ddir ddir, uint64_t *b)
{
if (!get_next_rand_offset(td, f, ddir, b))
return 0;
Expand All @@ -177,15 +234,15 @@ static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
}

static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
enum fio_ddir ddir, unsigned long long *offset)
enum fio_ddir ddir, uint64_t *offset)
{
assert(ddir_rw(ddir));

if (f->last_pos >= f->io_size + get_start_offset(td) && td->o.time_based)
f->last_pos = f->last_pos - f->io_size;

if (f->last_pos < f->real_file_size) {
unsigned long long pos;
uint64_t pos;

if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0)
f->last_pos = f->real_file_size;
Expand All @@ -205,7 +262,7 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
enum fio_ddir ddir, int rw_seq)
{
struct fio_file *f = io_u->file;
unsigned long long b, offset;
uint64_t b, offset;
int ret;

assert(ddir_rw(ddir));
Expand Down Expand Up @@ -1106,7 +1163,7 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u)
static void small_content_scramble(struct io_u *io_u)
{
unsigned int i, nr_blocks = io_u->buflen / 512;
unsigned long long boffset;
uint64_t boffset;
unsigned int offset;
void *p, *end;

Expand All @@ -1124,9 +1181,9 @@ static void small_content_scramble(struct io_u *io_u)
* and the actual offset.
*/
offset = (io_u->start_time.tv_usec ^ boffset) & 511;
offset &= ~(sizeof(unsigned long long) - 1);
if (offset >= 512 - sizeof(unsigned long long))
offset -= sizeof(unsigned long long);
offset &= ~(sizeof(uint64_t) - 1);
if (offset >= 512 - sizeof(uint64_t))
offset -= sizeof(uint64_t);
memcpy(p + offset, &boffset, sizeof(boffset));

end = p + 512 - sizeof(io_u->start_time);
Expand Down Expand Up @@ -1285,7 +1342,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,

static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
{
unsigned long long secs, remainder, bps, bytes;
uint64_t secs, remainder, bps, bytes;

bytes = td->this_io_bytes[ddir];
bps = td->rate_bps[ddir];
secs = bytes / bps;
Expand Down
140 changes: 140 additions & 0 deletions lib/flist_sort.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include <stdio.h>
#include <string.h>
#include "../flist.h"
#include "../log.h"

#define MAX_LIST_LENGTH_BITS 20

/*
* Returns a list organized in an intermediate format suited
* to chaining of merge() calls: null-terminated, no reserved or
* sentinel head node, "prev" links not maintained.
*/
static struct flist_head *merge(void *priv,
int (*cmp)(void *priv, struct flist_head *a,
struct flist_head *b),
struct flist_head *a, struct flist_head *b)
{
struct flist_head head, *tail = &head;

while (a && b) {
/* if equal, take 'a' -- important for sort stability */
if ((*cmp)(priv, a, b) <= 0) {
tail->next = a;
a = a->next;
} else {
tail->next = b;
b = b->next;
}
tail = tail->next;
}
tail->next = a?:b;
return head.next;
}

/*
* Combine final list merge with restoration of standard doubly-linked
* list structure. This approach duplicates code from merge(), but
* runs faster than the tidier alternatives of either a separate final
* prev-link restoration pass, or maintaining the prev links
* throughout.
*/
static void merge_and_restore_back_links(void *priv,
int (*cmp)(void *priv, struct flist_head *a,
struct flist_head *b),
struct flist_head *head,
struct flist_head *a, struct flist_head *b)
{
struct flist_head *tail = head;

while (a && b) {
/* if equal, take 'a' -- important for sort stability */
if ((*cmp)(priv, a, b) <= 0) {
tail->next = a;
a->prev = tail;
a = a->next;
} else {
tail->next = b;
b->prev = tail;
b = b->next;
}
tail = tail->next;
}
tail->next = a ? : b;

do {
/*
* In worst cases this loop may run many iterations.
* Continue callbacks to the client even though no
* element comparison is needed, so the client's cmp()
* routine can invoke cond_resched() periodically.
*/
(*cmp)(priv, tail->next, tail->next);

tail->next->prev = tail;
tail = tail->next;
} while (tail->next);

tail->next = head;
head->prev = tail;
}

/**
* list_sort - sort a list
* @priv: private data, opaque to list_sort(), passed to @cmp
* @head: the list to sort
* @cmp: the elements comparison function
*
* This function implements "merge sort", which has O(nlog(n))
* complexity.
*
* The comparison function @cmp must return a negative value if @a
* should sort before @b, and a positive value if @a should sort after
* @b. If @a and @b are equivalent, and their original relative
* ordering is to be preserved, @cmp must return 0.
*/
void flist_sort(void *priv, struct flist_head *head,
int (*cmp)(void *priv, struct flist_head *a,
struct flist_head *b))
{
struct flist_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
-- last slot is a sentinel */
int lev; /* index into part[] */
int max_lev = 0;
struct flist_head *list;

if (flist_empty(head))
return;

memset(part, 0, sizeof(part));

head->prev->next = NULL;
list = head->next;

while (list) {
struct flist_head *cur = list;
list = list->next;
cur->next = NULL;

for (lev = 0; part[lev]; lev++) {
cur = merge(priv, cmp, part[lev], cur);
part[lev] = NULL;
}
if (lev > max_lev) {
if (lev >= MAX_LIST_LENGTH_BITS) {
log_err("fio: list passed to"
" list_sort() too long for"
" efficiency\n");
lev--;
}
max_lev = lev;
}
part[lev] = cur;
}

for (lev = 0; lev < max_lev; lev++)
if (part[lev])
list = merge(priv, cmp, part[lev], list);

merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
}
Loading

0 comments on commit 1ae83d4

Please sign in to comment.