Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-135033 / 25.04.1 / zed: Ensure spare activation after kernel-initiated device removal #289

Merged
merged 1 commit into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions cmd/zed/agents/zfs_retire.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
const char *devtype;
char *devname;
boolean_t skip_removal = B_FALSE;

if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
&devtype) == 0) {
Expand Down Expand Up @@ -440,18 +441,28 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c);

if (vs->vs_state == VDEV_STATE_OFFLINE)
return;

/*
* If state removed is requested for already removed vdev,
* its a loopback event from spa_async_remove(). Just
* ignore it.
*/
if ((vs->vs_state == VDEV_STATE_REMOVED && state ==
VDEV_STATE_REMOVED) || vs->vs_state == VDEV_STATE_OFFLINE)
return;
if ((vs->vs_state == VDEV_STATE_REMOVED &&
state == VDEV_STATE_REMOVED)) {
if (strcmp(class, "resource.fs.zfs.removed") == 0 &&
nvlist_exists(nvl, "by_kernel")) {
skip_removal = B_TRUE;
} else {
return;
}
}

/* Remove the vdev since device is unplugged */
int remove_status = 0;
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
if (!skip_removal && (l2arc ||
(strcmp(class, "resource.fs.zfs.removed") == 0))) {
remove_status = zpool_vdev_remove_wanted(zhp, devname);
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
", err:%d", devname, libzfs_errno(zhdl));
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
#define SPA_ASYNC_REBUILD_DONE 0x2000
#define SPA_ASYNC_DETACH_SPARE 0x4000
#define SPA_ASYNC_REMOVE_BY_USER 0x8000

/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
Expand Down Expand Up @@ -1182,7 +1183,7 @@ extern void zfs_ereport_taskq_fini(void);
extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_approx_errlog_size(spa_t *spa);
Expand Down
19 changes: 12 additions & 7 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -8920,7 +8920,7 @@ spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
*/

static void
spa_async_remove(spa_t *spa, vdev_t *vd)
spa_async_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
{
if (vd->vdev_remove_wanted) {
vd->vdev_remove_wanted = B_FALSE;
Expand All @@ -8940,11 +8940,11 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
vdev_state_dirty(vd->vdev_top);

/* Tell userspace that the vdev is gone. */
zfs_post_remove(spa, vd);
zfs_post_remove(spa, vd, by_kernel);
}

for (int c = 0; c < vd->vdev_children; c++)
spa_async_remove(spa, vd->vdev_child[c]);
spa_async_remove(spa, vd->vdev_child[c], by_kernel);
}

static void
Expand Down Expand Up @@ -9038,13 +9038,18 @@ spa_async_thread(void *arg)
/*
* See if any devices need to be marked REMOVED.
*/
if (tasks & SPA_ASYNC_REMOVE) {
if (tasks & (SPA_ASYNC_REMOVE | SPA_ASYNC_REMOVE_BY_USER)) {
boolean_t by_kernel = B_TRUE;
if (tasks & SPA_ASYNC_REMOVE_BY_USER)
by_kernel = B_FALSE;
spa_vdev_state_enter(spa, SCL_NONE);
spa_async_remove(spa, spa->spa_root_vdev);
spa_async_remove(spa, spa->spa_root_vdev, by_kernel);
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i],
by_kernel);
for (int i = 0; i < spa->spa_spares.sav_count; i++)
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
spa_async_remove(spa, spa->spa_spares.sav_vdevs[i],
by_kernel);
(void) spa_vdev_state_exit(spa, NULL, 0);
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -4268,7 +4268,7 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));

vd->vdev_remove_wanted = B_TRUE;
spa_async_request(spa, SPA_ASYNC_REMOVE);
spa_async_request(spa, SPA_ASYNC_REMOVE_BY_USER);

return (spa_vdev_state_exit(spa, vd, 0));
}
Expand Down
18 changes: 16 additions & 2 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1432,9 +1432,23 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name,
* removal.
*/
void
zfs_post_remove(spa_t *spa, vdev_t *vd)
zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel)
{
zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL);
nvlist_t *aux = NULL;

if (by_kernel) {
/*
* Add optional supplemental keys to payload
*/
aux = fm_nvlist_create(NULL);
if (aux)
fnvlist_add_boolean(aux, "by_kernel");
}

zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, aux);

if (by_kernel && aux)
fm_nvlist_destroy(aux, FM_NVA_FREE);
}

/*
Expand Down
Loading