From 0477f3dc81cbf8f73f01c801d139a0bec1a088e8 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 27 Jan 2025 16:49:13 -0800 Subject: [PATCH] Add kstats tracking gang allocations Gang blocks have a significant impact on the long and short term performance of a zpool, but there is not a lot of observability into whether they're being used. This change adds gang-specific kstats to ZFS, to better allow users to see whether ganging is happening. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Paul Dagnelie --- module/zfs/zio.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ae5340da9f00..10930e7f381e 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -145,10 +145,53 @@ static const int zio_buf_debug_limit = 16384; static const int zio_buf_debug_limit = 0; #endif +typedef struct zio_stats { + kstat_named_t ziostat_total_allocations; + kstat_named_t ziostat_alloc_class_fallbacks; + kstat_named_t ziostat_gang_writes; + kstat_named_t ziostat_gang_multilevel; +} zio_stats_t; + +static zio_stats_t zio_stats = { + { "total_allocations", KSTAT_DATA_UINT64 }, + { "alloc_class_fallbacks", KSTAT_DATA_UINT64 }, + { "gang_writes", KSTAT_DATA_UINT64 }, + { "gang_multilevel", KSTAT_DATA_UINT64 }, +}; + +struct { + wmsum_t ziostat_total_allocations; + wmsum_t ziostat_alloc_class_fallbacks; + wmsum_t ziostat_gang_writes; + wmsum_t ziostat_gang_multilevel; +} ziostat_sums; + +#define ZIOSTAT_BUMP(stat) wmsum_add(&ziostat_sums.stat, 1); + +static kstat_t *zio_ksp; + static inline void __zio_execute(zio_t *zio); static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t); +static int +zio_kstats_update(kstat_t *ksp, int rw) +{ + zio_stats_t *zs = ksp->ks_data; + if (rw == KSTAT_WRITE) + return (EACCES); + + zs->ziostat_total_allocations.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_total_allocations); + zs->ziostat_alloc_class_fallbacks.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_alloc_class_fallbacks); + zs->ziostat_gang_writes.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_gang_writes); + zs->ziostat_gang_multilevel.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_gang_multilevel); + return (0); +} + void zio_init(void) { @@ -159,6 +202,19 @@ zio_init(void) zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + wmsum_init(&ziostat_sums.ziostat_total_allocations, 0); + wmsum_init(&ziostat_sums.ziostat_alloc_class_fallbacks, 0); + wmsum_init(&ziostat_sums.ziostat_gang_writes, 0); + wmsum_init(&ziostat_sums.ziostat_gang_multilevel, 0); + zio_ksp = kstat_create("zfs", 0, "zio_stats", + "misc", KSTAT_TYPE_NAMED, sizeof (zio_stats) / + sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); + if (zio_ksp != NULL) { + zio_ksp->ks_data = &zio_stats; + zio_ksp->ks_update = zio_kstats_update; + kstat_install(zio_ksp); + } + for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t align, cflags, data_cflags; @@ -286,6 +342,16 @@ zio_fini(void) VERIFY3P(zio_data_buf_cache[i], ==, NULL); } + if (zio_ksp != NULL) { + kstat_delete(zio_ksp); + zio_ksp = NULL; + } + + wmsum_fini(&ziostat_sums.ziostat_total_allocations); + wmsum_fini(&ziostat_sums.ziostat_alloc_class_fallbacks); + wmsum_fini(&ziostat_sums.ziostat_gang_writes); + wmsum_fini(&ziostat_sums.ziostat_gang_multilevel); + kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -4053,6 +4119,7 @@ zio_dva_allocate(zio_t *zio) mc = spa_preferred_class(spa, zio); zio->io_metaslab_class = mc; } + ZIOSTAT_BUMP(ziostat_total_allocations); /* * Try allocating the block in the usual metaslab class. @@ -4118,6 +4185,7 @@ zio_dva_allocate(zio_t *zio) error); } + ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); error = metaslab_alloc(spa, mc, zio->io_size, bp, zio->io_prop.zp_copies, zio->io_txg, NULL, flags, &zio->io_alloc_list, zio, zio->io_allocator); @@ -4130,6 +4198,9 @@ zio_dva_allocate(zio_t *zio) spa_name(spa), zio, (u_longlong_t)zio->io_size, error); } + ZIOSTAT_BUMP(ziostat_gang_writes); + if (flags & METASLAB_GANG_CHILD) + ZIOSTAT_BUMP(ziostat_gang_multilevel); return (zio_write_gang_block(zio, mc)); } if (error != 0) { @@ -4221,6 +4292,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, int flags = METASLAB_ZIL; int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; + ZIOSTAT_BUMP(ziostat_total_allocations); error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); *slog = (error == 0); @@ -4230,6 +4302,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, &io_alloc_list, NULL, allocator); } if (error != 0) { + ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator);