Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/zed/agents/zfs_diagnosis.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
*/
if (isresource) {
zfs_stats.resource_drops.fmds_value.ui64++;
fmd_hdl_debug(hdl, "discarding '%s for vdev %llu",
fmd_hdl_debug(hdl, "discarding '%s' for vdev %llu",
class, vdev_guid);
return;
}
Expand Down
139 changes: 126 additions & 13 deletions cmd/zed/agents/zfs_retire.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,16 @@ find_pool(zpool_handle_t *zhp, void *data)
* Find a vdev within a tree with a matching GUID.
*/
static nvlist_t *
find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid,
uint64_t *parent_guid)
{
uint64_t guid;
uint64_t guid, saved_parent_guid;
nvlist_t **child;
uint_t c, children;
nvlist_t *ret;
nvlist_t *ret = NULL;

if (parent_guid != NULL)
saved_parent_guid = *parent_guid;

if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == search_guid) {
Expand All @@ -119,29 +123,38 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
return (NULL);

for (c = 0; c < children; c++) {
if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
return (ret);
if ((ret = find_vdev(zhdl, child[c], search_guid,
parent_guid)) != NULL)
goto out;
}

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) != 0)
return (NULL);

for (c = 0; c < children; c++) {
if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
return (ret);
if ((ret = find_vdev(zhdl, child[c], search_guid,
parent_guid)) != NULL)
goto out;
}

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) != 0)
return (NULL);

for (c = 0; c < children; c++) {
if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
return (ret);
if ((ret = find_vdev(zhdl, child[c], search_guid,
parent_guid)) != NULL)
goto out;
}

return (NULL);
out:
/* If parent_guid was set, don't reset it. */
if (ret != NULL && parent_guid != NULL &&
saved_parent_guid == *parent_guid)
*parent_guid = guid;
return (ret);
}

static int
Expand Down Expand Up @@ -203,11 +216,12 @@ find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid)
}

/*
* Given a (pool, vdev) GUID pair, find the matching pool and vdev.
* Given a (pool, vdev) GUID pair, find the matching pool, vdev and
* its top_guid.
*/
static zpool_handle_t *
find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
nvlist_t **vdevp)
find_by_guid_impl(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
nvlist_t **vdevp, uint64_t *top_guid)
{
find_cbdata_t cb;
zpool_handle_t *zhp;
Expand All @@ -229,7 +243,8 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
}

if (vdev_guid != 0) {
if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid)) == NULL) {
if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid,
top_guid)) == NULL) {
zpool_close(zhp);
return (NULL);
}
Expand All @@ -238,6 +253,96 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
return (zhp);
}

/*
* Given a (pool, vdev) GUID pair, find the matching pool and vdev.
*/
static zpool_handle_t *
find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
nvlist_t **vdevp)
{
return (find_by_guid_impl(zhdl, pool_guid, vdev_guid, vdevp, NULL));
}

/*
* Given a (pool, vdev) GUID pair, count the number of faulted vdevs in
* its top vdev and return TRUE if the number of failures at i-th device
* index in each dRAID failure group, equals to the number of failure groups,
* which means it's the domain failure, and the vdev is one of those faults.
* Otherwise, return FALSE.
*/
static boolean_t
is_draid_fdomain_failure(libzfs_handle_t *zhdl, uint64_t pool_guid,
uint64_t vdev_guid)
{
uint64_t guid, top_guid;
uint64_t children;
nvlist_t *nvtop, *vdev, **child;
vdev_stat_t *vs;
uint_t i, c, vdev_i = UINT_MAX, width, *nfaults_map;

if (find_by_guid_impl(zhdl, pool_guid, vdev_guid, &vdev,
&top_guid) == NULL)
return (B_FALSE);

if (find_by_guid_impl(zhdl, pool_guid, top_guid, &nvtop,
NULL) == NULL)
return (B_FALSE);

if (nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN,
&child, &width) != 0)
return (B_FALSE);

if (nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_DRAID_NCHILDREN,
&children) != 0) /* not dRAID */
return (B_FALSE);

if (width == children) /* dRAID without failure domains */
return (B_FALSE);

/*
* No rush with starting resilver, it can be domain failure,
* in which case we need to wait a little to allow more devices
* to get into faulted state so that we could detect that
* it's the domain failure indeed.
*/
sleep(5);

nfaults_map = calloc(children, sizeof (*nfaults_map));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: let's use the fmd_hdl_alloc / fmd_hdl_free wrappers with FMD_SLEEP which can't fail.

if (nfaults_map == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}

for (c = 0; c < width; c++) {
nvlist_lookup_uint64_array(child[c], ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &i);

if (vs->vs_state == VDEV_STATE_FAULTED)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only checking for FAULTED is probably too strict since if the device is unavailable for another reason this logic won't detect it. For example, it was already replaced by a distributed hot spare. Checking for vs->vs_state != VDEV_STATE_HEALTHY instead seems like what we want.

nfaults_map[c % children]++;

if (vs->vs_state == VDEV_STATE_FAULTED &&
nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
&guid) == 0 && guid == vdev_guid)
vdev_i = (c % children);
}

boolean_t res = B_FALSE;
for (c = 0; c < children; c++) {
if (c == vdev_i && nfaults_map[c] == (width / children)) {
res = B_TRUE;
break;
}
}

free(nfaults_map);

if (res)
fmd_hdl_debug(fmd_module_hdl("zfs-retire"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zfs_retire_recv() has the fmd_hdl_t and can pass it to is_draid_fdomain_failure().

"vdev %llu belongs to draid fdomain failure", vdev_guid);

return (res);
}

/*
* Given a vdev, attempt to replace it with every known spare until one
* succeeds or we run out of devices to try.
Expand Down Expand Up @@ -445,6 +550,14 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
if (vs->vs_state == VDEV_STATE_OFFLINE)
return;

/*
* Resilvering domain failures can take a lot of computing and
* I/O bandwidth resources, only to be wasted when the failed
* domain component (for example enclosure) is replaced.
*/
if (is_draid_fdomain_failure(zhdl, pool_guid, vdev_guid))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to investigate further but when locally testing with a draid2:6d:10c:90w:9s pool and faulting an entire failure domain with zpool offline -f pool vdev0 vdev1 vdev2... hot spares were still kicked in by the updated zed.

return;

/*
* If state removed is requested for already removed vdev,
* its a loopback event from spa_async_remove(). Just
Expand Down
21 changes: 19 additions & 2 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3528,6 +3528,11 @@ show_import(nvlist_t *config, boolean_t report_error)
"accessed by another system.\n"));
break;

case ZPOOL_STATUS_FAULTED_FDOM_R:
(void) printf_color(ANSI_YELLOW, gettext("One or more failure "
" domains are faulted.\n"));
break;

case ZPOOL_STATUS_FAULTED_DEV_R:
case ZPOOL_STATUS_FAULTED_DEV_NR:
(void) printf_color(ANSI_YELLOW, gettext("One or more devices "
Expand Down Expand Up @@ -8039,7 +8044,7 @@ zpool_do_online(int argc, char **argv)

if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
(void) fprintf(stderr, gettext("failed to open pool "
"\"%s\""), poolname);
"\"%s\"\n"), poolname);
return (1);
}

Expand Down Expand Up @@ -8183,7 +8188,7 @@ zpool_do_offline(int argc, char **argv)

if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
(void) fprintf(stderr, gettext("failed to open pool "
"\"%s\""), poolname);
"\"%s\"\n"), poolname);
return (1);
}

Expand Down Expand Up @@ -10725,6 +10730,18 @@ print_status_reason(zpool_handle_t *zhp, status_cbdata_t *cbp,
"or use 'zpool clear' to mark the device\n\trepaired.\n"));
break;

case ZPOOL_STATUS_FAULTED_FDOM_R:
(void) snprintf(status, ST_SIZE,
gettext("One or more failure domains are faulted. "
"The storage devices may be\n\tintact. Sufficient "
"replicas exist for the pool to continue functioning\n\t"
"in a degraded state.\n"));
(void) snprintf(action, AC_SIZE,
gettext("Replace the faulted domain device, "
"or use 'zpool clear' to mark domain\n\tstorage devices "
"repaired.\n"));
break;

case ZPOOL_STATUS_FAULTED_DEV_NR:
(void) snprintf(status, ST_SIZE,
gettext("One or more devices are "
Expand Down
Loading
Loading