Skip to content

Commit 2532291

Browse files
Merge pull request #17757 from daos-stack/liang/b_aurora_p5
This PR includes three commits: - Add a latency for the first rebuild task, so it wouldn't be scheduled before SWIM gets stable - Add more debug messages for reclaim failures - Only require 2 credits for GC if it's under space pressure, so it can run with minimum space overhead - Skip limited number of EBUSY objects during fail reclaim, instead of blocking rebuild forever, These objects will be reclaimed by other rebuilds. Signed-off-by: Xuezhao Liu <xuezhao.liu@hpe.com>
2 parents e8b1443 + 3d9bacc commit 2532291

12 files changed

Lines changed: 193 additions & 46 deletions

File tree

src/container/srv_container.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -1707,6 +1707,7 @@ cont_ec_agg_alloc(struct cont_svc *cont_svc, uuid_t cont_uuid,
17071707
for (i = 0; i < rank_nr; i++) {
17081708
ec_agg->ea_server_ephs[i].rank = doms[i].do_comp.co_rank;
17091709
ec_agg->ea_server_ephs[i].eph = 0;
1710+
ec_agg->ea_server_ephs[i].ee_update_ts = daos_gettime_coarse();
17101711
}
17111712
d_list_add(&ec_agg->ea_list, &cont_svc->cs_ec_agg_list);
17121713
*ec_aggp = ec_agg;
@@ -1764,8 +1765,10 @@ ds_cont_leader_update_agg_eph(uuid_t pool_uuid, uuid_t cont_uuid,
17641765

17651766
for (i = 0; i < ec_agg->ea_servers_num; i++) {
17661767
if (ec_agg->ea_server_ephs[i].rank == rank) {
1767-
if (ec_agg->ea_server_ephs[i].eph < eph)
1768+
if (ec_agg->ea_server_ephs[i].eph < eph) {
17681769
ec_agg->ea_server_ephs[i].eph = eph;
1770+
ec_agg->ea_server_ephs[i].ee_update_ts = daos_gettime_coarse();
1771+
}
17691772
break;
17701773
}
17711774
}
@@ -1989,6 +1992,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc)
19891992
daos_epoch_t cur_eph, new_eph;
19901993
daos_epoch_t min_eph;
19911994
d_rank_t rank;
1995+
uint64_t cur_ts;
19921996
int i;
19931997
int rc;
19941998

@@ -2015,6 +2019,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc)
20152019
}
20162020

20172021
min_eph = DAOS_EPOCH_MAX;
2022+
cur_ts = daos_gettime_coarse();
20182023
for (i = 0; i < ec_agg->ea_servers_num; i++) {
20192024
rank = ec_agg->ea_server_ephs[i].rank;
20202025

@@ -2024,6 +2029,13 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc)
20242029
continue;
20252030
}
20262031

2032+
if (pool->sp_reclaim != DAOS_RECLAIM_DISABLED &&
2033+
cur_ts > ec_agg->ea_server_ephs[i].ee_update_ts + 600)
2034+
D_WARN(DF_CONT ": Sluggish EC boundary report from rank %d, " DF_U64
2035+
" Seconds.",
2036+
DP_CONT(svc->cs_pool_uuid, ec_agg->ea_cont_uuid), rank,
2037+
cur_ts - ec_agg->ea_server_ephs[i].ee_update_ts);
2038+
20272039
if (ec_agg->ea_server_ephs[i].eph < min_eph)
20282040
min_eph = ec_agg->ea_server_ephs[i].eph;
20292041
}

src/container/srv_internal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -61,6 +61,7 @@ extern bool ec_agg_disabled;
6161
struct ec_eph {
6262
d_rank_t rank;
6363
daos_epoch_t eph;
64+
uint64_t ee_update_ts; /* update timestamp */
6465
};
6566

6667
/* container EC aggregation epoch control descriptor, which is only on leader */

src/object/cli_obj.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -4122,8 +4122,10 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor)
41224122
obj_auxi_shards_iterate(obj_auxi, update_sub_anchor_cb, NULL);
41234123

41244124
sub_anchors = (struct shard_anchors *)anchor->da_sub_anchors;
4125-
if (!d_list_empty(&sub_anchors->sa_merged_list))
4125+
if (!d_list_empty(&sub_anchors->sa_merged_list)) {
4126+
D_ASSERT(obj_auxi->opc != DAOS_OBJ_RPC_ENUMERATE);
41264127
return;
4128+
}
41274129

41284130
if (sub_anchors_is_eof(sub_anchors)) {
41294131
daos_obj_list_t *obj_args;
@@ -4132,6 +4134,18 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor)
41324134

41334135
obj_args = dc_task_get_args(obj_auxi->obj_task);
41344136
sub_anchors_free(obj_args, obj_auxi->opc);
4137+
} else if (obj_auxi->opc == DAOS_OBJ_RPC_ENUMERATE && D_LOG_ENABLED(DB_REBUILD)) {
4138+
for (int i = 0; i < sub_anchors->sa_anchors_nr; i++) {
4139+
daos_anchor_t *sub_anchor;
4140+
4141+
sub_anchor = &sub_anchors->sa_anchors[i].ssa_anchor;
4142+
if (!daos_anchor_is_eof(sub_anchor)) {
4143+
D_DEBUG(DB_REBUILD, "shard %d sub_anchor %d/%d non EOF",
4144+
sub_anchors->sa_anchors[i].ssa_shard, i,
4145+
sub_anchors->sa_anchors_nr);
4146+
break;
4147+
}
4148+
}
41354149
}
41364150
}
41374151

@@ -6445,7 +6459,7 @@ shard_anchors_check_alloc_bufs(struct obj_auxi_args *obj_auxi, struct shard_anch
64456459
}
64466460

64476461
if (obj_args->recxs != NULL) {
6448-
if (sub_anchor->ssa_recxs != NULL && sub_anchors->sa_nr == nr)
6462+
if (sub_anchor->ssa_recxs != NULL && sub_anchors->sa_nr != nr)
64496463
D_FREE(sub_anchor->ssa_recxs);
64506464

64516465
if (sub_anchor->ssa_recxs == NULL) {

src/object/obj_enum.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* (C) Copyright 2018-2022 Intel Corporation.
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -689,9 +690,8 @@ obj_enum_iterate(daos_key_desc_t *kdss, d_sg_list_t *sgl, int nr,
689690
ptr = sgl_indexed_byte(sgl, &sgl_idx);
690691
D_ASSERTF(ptr != NULL, "kds and sgl don't line up");
691692

692-
D_DEBUG(DB_REBUILD, "process %d, type %d, ptr %p, len "DF_U64
693-
", total %zd\n", i, kds->kd_val_type, ptr,
694-
kds->kd_key_len, sgl->sg_iovs[0].iov_len);
693+
D_DEBUG(DB_REBUILD, "process %d/%d, type %d, ptr %p, len " DF_U64 ", total %zd\n",
694+
i, nr, kds->kd_val_type, ptr, kds->kd_key_len, sgl->sg_iovs[0].iov_len);
695695
if (kds->kd_val_type == 0 ||
696696
(kds->kd_val_type != type && type != -1)) {
697697
sgl_move_forward(sgl, &sgl_idx, kds->kd_key_len);

src/object/srv_obj.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
* (C) Copyright 2025 Google LLC
55
*
66
* SPDX-License-Identifier: BSD-2-Clause-Patent
@@ -2423,7 +2423,7 @@ obj_inflight_io_check(struct ds_cont_child *child, uint32_t opc,
24232423
uint32_t rpc_map_ver, uint32_t flags)
24242424
{
24252425
if (opc == DAOS_OBJ_RPC_ENUMERATE && flags & ORF_FOR_MIGRATION) {
2426-
/* EC aggregation is still inflight, rebuild should wait until it's paused */
2426+
/* EC aggregation is still in-flight, rebuild should wait until it's paused */
24272427
if (ds_cont_child_ec_aggregating(child)) {
24282428
D_ERROR(DF_CONT" ec aggregate still active, rebuilding %d\n",
24292429
DP_CONT(child->sc_pool->spc_uuid, child->sc_uuid),
@@ -3258,6 +3258,27 @@ obj_enum_complete(crt_rpc_t *rpc, int status, int map_version,
32583258
D_FREE(oeo->oeo_csum_iov.iov_buf);
32593259
}
32603260

3261+
static void
3262+
dump_enum_anchor(daos_unit_oid_t uoid, daos_anchor_t *anchor, char *str)
3263+
{
3264+
int nr = DAOS_ANCHOR_BUF_MAX / 8;
3265+
int i;
3266+
uint64_t data[nr];
3267+
3268+
D_DEBUG(DB_REBUILD, DF_UOID "%s anchor -", DP_UOID(uoid), str);
3269+
D_DEBUG(DB_REBUILD, "type %d, shard %d, flags 0x%x\n", anchor->da_type, anchor->da_shard,
3270+
anchor->da_flags);
3271+
for (i = 0; i < nr; i++)
3272+
data[i] = *(uint64_t *)((char *)anchor->da_buf + i * 8);
3273+
if (nr >= 13)
3274+
D_DEBUG(DB_REBUILD,
3275+
"da_buf " DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64
3276+
"," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64
3277+
"," DF_X64,
3278+
data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
3279+
data[8], data[9], data[10], data[11], data[12]);
3280+
}
3281+
32613282
static int
32623283
obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc,
32633284
struct vos_iter_anchors *anchors, struct ds_obj_enum_arg *enum_arg,
@@ -3326,6 +3347,10 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc,
33263347
D_ASSERT(opc == DAOS_OBJ_RPC_ENUMERATE);
33273348
type = VOS_ITER_DKEY;
33283349
param.ip_flags |= VOS_IT_RECX_VISIBLE;
3350+
if (D_LOG_ENABLED(DB_REBUILD)) {
3351+
dump_enum_anchor(oei->oei_oid, &anchors->ia_dkey, "dkey");
3352+
dump_enum_anchor(oei->oei_oid, &anchors->ia_akey, "akey");
3353+
}
33293354
if (daos_anchor_get_flags(&anchors->ia_dkey) &
33303355
DIOF_WITH_SPEC_EPOCH) {
33313356
/* For obj verification case. */
@@ -3343,7 +3368,12 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc,
33433368
enum_arg->chk_key2big = 1;
33443369
enum_arg->need_punch = 1;
33453370
enum_arg->copy_data_cb = vos_iter_copy;
3346-
fill_oid(oei->oei_oid, enum_arg);
3371+
rc = fill_oid(oei->oei_oid, enum_arg);
3372+
if (rc != 0) {
3373+
rc = -DER_KEY2BIG;
3374+
DL_ERROR(rc, DF_UOID "fill oid failed", DP_UOID(oei->oei_oid));
3375+
goto failed;
3376+
}
33473377
}
33483378

33493379
/*

src/object/srv_obj_migrate.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2019-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -2651,9 +2651,15 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data)
26512651
if (rc == 1 &&
26522652
(is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) ||
26532653
(io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) {
2654-
D_DEBUG(DB_REBUILD, DF_UOID" ignore shard "DF_KEY"/%u/%d/%u/%d.\n",
2655-
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard,
2656-
(int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc);
2654+
if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8)
2655+
D_DEBUG(DB_REBUILD,
2656+
DF_UOID " ignore shard, int dkey " DF_U64 "/%u/%d/%u/%d.",
2657+
DP_UOID(io->ui_oid), *(uint64_t *)io->ui_dkey.iov_buf, shard,
2658+
(int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc);
2659+
else
2660+
D_DEBUG(DB_REBUILD, DF_UOID " ignore shard " DF_KEY "/%u/%d/%u/%d.",
2661+
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard,
2662+
(int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc);
26572663
D_GOTO(put, rc = 0);
26582664
}
26592665
rc = 0;
@@ -2669,9 +2675,17 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data)
26692675
continue;
26702676
}
26712677

2672-
D_DEBUG(DB_REBUILD, DF_UOID" unpack "DF_KEY" for shard %u/%u/%u/"DF_X64"/%u\n",
2673-
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, unpack_tgt_off,
2674-
migrate_tgt_off, io->ui_dkey_hash, parity_shard);
2678+
if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8)
2679+
D_DEBUG(DB_REBUILD,
2680+
DF_UOID " unpack int dkey " DF_U64 " for shard %u/%u/%u/" DF_X64
2681+
"/%u",
2682+
DP_UOID(io->ui_oid), *(uint64_t *)io->ui_dkey.iov_buf, shard,
2683+
unpack_tgt_off, migrate_tgt_off, io->ui_dkey_hash, parity_shard);
2684+
else
2685+
D_DEBUG(DB_REBUILD,
2686+
DF_UOID " unpack " DF_KEY " for shard %u/%u/%u/" DF_X64 "/%u",
2687+
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, unpack_tgt_off,
2688+
migrate_tgt_off, io->ui_dkey_hash, parity_shard);
26752689

26762690
/**
26772691
* Since we do not need split the rebuild into parity rebuild
@@ -2708,8 +2722,13 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data)
27082722
if (!create_migrate_one) {
27092723
struct ds_cont_child *cont = NULL;
27102724

2711-
D_DEBUG(DB_REBUILD, DF_UOID"/"DF_KEY" does not need rebuild.\n",
2712-
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey));
2725+
if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8)
2726+
D_DEBUG(DB_REBUILD,
2727+
DF_UOID "/int dkey: " DF_U64 " does not need rebuild.\n",
2728+
DP_UOID(io->ui_oid), *(uint64_t *)io->ui_dkey.iov_buf);
2729+
else
2730+
D_DEBUG(DB_REBUILD, DF_UOID "/" DF_KEY " does not need rebuild.\n",
2731+
DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey));
27132732

27142733
/* Create the vos container when no record need to be rebuilt for this shard,
27152734
* for the case of reintegrate the container was discarded ahead.

src/pool/srv_pool.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -2257,6 +2257,11 @@ static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map,
22572257
uint32_t map_version_for, bool sync_remove);
22582258
static void pool_svc_rfcheck_ult(void *arg);
22592259

2260+
#define RESTART_RB_DELAY_ENV "D_RESTART_RB_DELAY"
2261+
#define RESTART_RB_DELAY_MIN (60)
2262+
#define RESTART_RB_DELAY_DEF (100)
2263+
#define RESTART_RB_DELAY_MAX (600)
2264+
22602265
static int
22612266
pool_svc_step_up_cb(struct ds_rsvc *rsvc)
22622267
{
@@ -2268,6 +2273,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
22682273
daos_prop_t *prop = NULL;
22692274
bool cont_svc_up = false;
22702275
bool events_initialized = false;
2276+
uint64_t age_sec, delay;
2277+
uint32_t restart_rb_delay = RESTART_RB_DELAY_DEF;
22712278
d_rank_t rank = dss_self_rank();
22722279
int rc;
22732280

@@ -2367,7 +2374,18 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
23672374
if (rc != 0)
23682375
goto out;
23692376

2370-
rc = ds_rebuild_regenerate_task(svc->ps_pool, prop, 0);
2377+
rc = d_getenv_uint(RESTART_RB_DELAY_ENV, &restart_rb_delay);
2378+
if (rc == 0) {
2379+
if (restart_rb_delay < RESTART_RB_DELAY_MIN)
2380+
restart_rb_delay = RESTART_RB_DELAY_MIN;
2381+
if (restart_rb_delay > RESTART_RB_DELAY_MAX)
2382+
restart_rb_delay = RESTART_RB_DELAY_MAX;
2383+
}
2384+
age_sec = d_hlc_age2sec(dss_get_start_epoch());
2385+
delay = age_sec < restart_rb_delay ? (restart_rb_delay - age_sec) : 0;
2386+
D_INFO("delay %d, " DF_U64 " seconds for ds_rebuild_regenerate_task\n", restart_rb_delay,
2387+
delay);
2388+
rc = ds_rebuild_regenerate_task(svc->ps_pool, prop, delay);
23712389
if (rc != 0)
23722390
goto out;
23732391

src/rebuild/rebuild_internal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2017-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -247,6 +247,7 @@ struct rebuild_pool_tls {
247247
d_list_t rebuild_pool_list;
248248
uint64_t rebuild_pool_obj_count;
249249
uint64_t rebuild_pool_reclaim_obj_count;
250+
uint64_t rebuild_pool_reclaim_skipped;
250251
unsigned int rebuild_pool_ver;
251252
uint32_t rebuild_pool_gen;
252253
uint64_t rebuild_pool_leader_term;

0 commit comments

Comments
 (0)