Skip to content

Commit a442652

Browse files
committed
Merge remote-tracking branch 'origin/release/2.6' into grom72/SRE-3703-2.6
Signed-off-by: Tomasz Gromadzki <tomasz.gromadzki@hpe.com>
2 parents f402f18 + c3c4f25 commit a442652

5 files changed

Lines changed: 87 additions & 34 deletions

File tree

src/object/srv_obj_migrate.c

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,17 @@ struct iter_obj_arg {
271271
uint32_t generation;
272272
};
273273

274+
#define MIGR_RETRY_WAIT_WARN(tls, oid, rc, tried, duration) \
275+
do { \
276+
tried++; \
277+
if (tried >= 4096) \
278+
tried = 2048; \
279+
if ((tried & (tried - 1)) == 0) \
280+
DL_WARN(rc, DF_RB ": retry " DF_UOID ", tried[%d] " DF_U64 " seconds ", \
281+
DP_RB_MPT(tls), DP_UOID(oid), tried, (duration)); \
282+
dss_sleep(1000); \
283+
} while (0)
284+
274285
static int
275286
migrate_try_obj_insert(struct migrate_pool_tls *tls, uuid_t co_uuid, daos_unit_oid_t oid,
276287
daos_epoch_t epoch, daos_epoch_t punched_epoch, unsigned int shard,
@@ -778,6 +789,7 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_
778789
uint64_t now;
779790
int rc;
780791
int wait = MEM_NO_WAIT;
792+
int tried = 0;
781793

782794
/* pass rebuild epoch by extra_arg */
783795
if (flags & DIOF_FETCH_EPOCH_EC_AGG_BOUNDARY) {
@@ -789,7 +801,8 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_
789801
retry:
790802
rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, extra_arg,
791803
csum_iov_fetch);
792-
if ((rc == -DER_TIMEDOUT || rc == -DER_FETCH_AGAIN || rc == -DER_NOMEM) &&
804+
if ((rc == -DER_TIMEDOUT || rc == -DER_FETCH_AGAIN || rc == -DER_NOMEM ||
805+
daos_crt_network_error(rc)) &&
793806
tls->mpt_version + 1 >= tls->mpt_pool->spc_map_version) {
794807
if (tls->mpt_fini) {
795808
DL_ERROR(rc, DF_RB ": dsc_obj_fetch " DF_UOID "failed when mpt_fini",
@@ -799,19 +812,25 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_
799812
/* If pool map does not change, then let's retry for timeout, instead of
800813
* fail out.
801814
*/
815+
now = daos_gettime_coarse();
816+
if (then == 0)
817+
then = now;
818+
802819
if (rc != -DER_NOMEM) {
803-
DL_WARN(rc, DF_RB ": retry " DF_UOID, DP_RB_MPT(tls),
804-
DP_UOID(mrone->mo_oid));
805-
dss_sleep(1000);
806-
D_GOTO(retry, rc);
820+
if (rc == -DER_TIMEDOUT || rc == -DER_FETCH_AGAIN || now - then < 600) {
821+
MIGR_RETRY_WAIT_WARN(tls, mrone->mo_oid, rc, tried, now - then);
822+
D_GOTO(retry, rc);
823+
}
824+
/* waited for too long, return error and restart rebuild */
825+
DL_ERROR(rc, DF_RB " waited for over 10 minutes due to network error",
826+
DP_RB_MRO(mrone));
827+
return rc;
807828
}
808829

809-
now = daos_gettime_coarse();
810830
if (wait == MEM_NO_WAIT) {
811831
wait = MEM_WAIT;
812832
res->res_data.mem_waiting++;
813833
res->res_data.mem_err++;
814-
then = now;
815834
}
816835
/* sleep a few seconds before retry, give other layers a chance to
817836
* release resources.
@@ -3081,7 +3100,9 @@ migrate_obj_epoch(struct migrate_pool_tls *tls, struct iter_obj_arg *arg, daos_e
30813100
uint32_t minimum_nr;
30823101
uint32_t enum_flags;
30833102
uint32_t num;
3084-
int waited = 0;
3103+
uint64_t now;
3104+
uint64_t then = 0;
3105+
int tried = 0;
30853106
int rc = 0;
30863107

30873108
D_DEBUG(DB_REBUILD, "migrate obj "DF_UOID" for shard %u eph "
@@ -3216,23 +3237,29 @@ migrate_obj_epoch(struct migrate_pool_tls *tls, struct iter_obj_arg *arg, daos_e
32163237
/* -DER_UPDATE_AGAIN means the remote target does not parse EC
32173238
* aggregation yet, so let's retry.
32183239
*/
3219-
waited++;
3240+
now = daos_gettime_coarse();
3241+
if (then == 0)
3242+
then = now;
32203243
dss_sleep(5000);
32213244
D_DEBUG(DB_REBUILD, DF_UOID "retry %d secs with %d \n", DP_UOID(arg->oid),
3222-
waited * 5, rc);
3245+
(int)(now + 5 - then), rc);
32233246
rc = 0;
32243247
continue;
32253248
} else if (rc) {
32263249
/* To avoid reclaim and retry rebuild, let's retry until the pool map
32273250
* being changed due to further failure.
32283251
*/
3229-
if (rc == -DER_TIMEDOUT &&
3252+
if ((rc == -DER_TIMEDOUT || daos_crt_network_error(rc)) &&
32303253
tls->mpt_version + 1 >= tls->mpt_pool->spc_map_version) {
3231-
D_WARN(DF_UUID" retry "DF_UOID" "DF_RC"\n",
3232-
DP_UUID(tls->mpt_pool_uuid), DP_UOID(arg->oid),
3233-
DP_RC(rc));
3234-
rc = 0;
3235-
continue;
3254+
now = daos_gettime_coarse();
3255+
if (then == 0)
3256+
then = now;
3257+
if (rc == -DER_TIMEDOUT || now - then < 600) {
3258+
MIGR_RETRY_WAIT_WARN(tls, arg->oid, rc, tried, now - then);
3259+
rc = 0;
3260+
continue;
3261+
}
3262+
/* fall through and fail rebuild */
32363263
}
32373264

32383265
/* container might have been destroyed. Or there is

src/rebuild/rebuild_iv.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2017-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -188,8 +188,8 @@ rebuild_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key,
188188

189189
if (ref_rc != 0) {
190190
rc = ref_rc;
191-
DL_WARN(rc, DF_UUID "bypass refresh, IV class id %d.",
192-
DP_UUID(entry->ns->iv_pool_uuid), key->class_id);
191+
DL_WARN(rc, DF_RB ", IV ns pool " DF_UUID "bypass refresh, IV class id %d.",
192+
DP_RB_RPT(rpt), DP_UUID(entry->ns->iv_pool_uuid), key->class_id);
193193
goto out;
194194
}
195195

@@ -214,11 +214,16 @@ rebuild_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key,
214214
D_WARN("leader change stable epoch from "DF_U64" to "
215215
DF_U64 "\n", rpt->rt_stable_epoch,
216216
dst_iv->riv_stable_epoch);
217-
rpt->rt_global_done = dst_iv->riv_global_done;
218-
rpt->rt_global_scan_done = dst_iv->riv_global_scan_done;
217+
218+
/* NB: IV refresh can arrive out of order, but rebuild can't revert global done */
219+
if (!rpt->rt_global_done)
220+
rpt->rt_global_done = dst_iv->riv_global_done;
221+
if (!rpt->rt_global_scan_done)
222+
rpt->rt_global_scan_done = dst_iv->riv_global_scan_done;
219223
old_ver = rpt->rt_global_dtx_resync_version;
220-
if (rpt->rt_global_dtx_resync_version < dst_iv->riv_global_dtx_resyc_version)
224+
if (old_ver < dst_iv->riv_global_dtx_resyc_version)
221225
rpt->rt_global_dtx_resync_version = dst_iv->riv_global_dtx_resyc_version;
226+
222227
if (old_ver < rpt->rt_rebuild_ver &&
223228
dst_iv->riv_global_dtx_resyc_version >= rpt->rt_rebuild_ver) {
224229
D_INFO(DF_UUID " global/iv/rebuild_ver %u/%u/%u signal wait cond\n",

src/rebuild/scan.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,17 @@ rebuild_obj_send_cb(struct tree_cache_root *root, struct rebuild_send_arg *arg)
131131

132132
if (rpt->rt_abort || rpt->rt_finishing || rpt->rt_global_done) {
133133
rc = -DER_SHUTDOWN;
134-
DL_INFO(rc, DF_RB ": give up ds_object_migrate_send, shutdown rebuild",
135-
DP_RB_RPT(rpt));
134+
DL_INFO(rc,
135+
DF_RB ": rt_abort %d, rt_finishing %d, rt_global_done %d, "
136+
"give up ds_object_migrate_send, shutdown rebuild",
137+
DP_RB_RPT(rpt), rpt->rt_abort, rpt->rt_finishing,
138+
rpt->rt_global_done);
136139
break;
137140
}
138141

139142
/* otherwise let's retry */
140-
D_DEBUG(DB_REBUILD, DF_UUID" retry send object to tgt_id %d\n",
141-
DP_UUID(rpt->rt_pool_uuid), arg->tgt_id);
143+
D_DEBUG(DB_REBUILD, DF_RB " retry send object to tgt_id %d\n", DP_RB_RPT(rpt),
144+
arg->tgt_id);
142145
dss_sleep(daos_rpc_rand_delay(max_delay) << 10);
143146
}
144147
out:
@@ -360,8 +363,10 @@ rebuild_objects_send_ult(void *data)
360363
D_FREE(ephs);
361364
if (punched_ephs != NULL)
362365
D_FREE(punched_ephs);
363-
if (rc != 0 && tls->rebuild_pool_status == 0)
366+
if (rc != 0 && tls->rebuild_pool_status == 0) {
367+
DL_ERROR(rc, DF_RB " set rebuild_pool_status as failed", DP_RB_RPT(rpt));
364368
tls->rebuild_pool_status = rc;
369+
}
365370

366371
rpt_put(rpt);
367372
}
@@ -1305,6 +1310,7 @@ rebuild_tgt_scan_handler(crt_rpc_t *rpc)
13051310
rpt->rt_re_report = 1;
13061311

13071312
rpt->rt_leader_rank = rsi->rsi_master_rank;
1313+
rpt->rt_rebuild_gen = rsi->rsi_rebuild_gen;
13081314

13091315
/* If this is the old leader, then also stop the rebuild tracking ULT. */
13101316
rebuild_leader_stop(rsi->rsi_pool_uuid, rsi->rsi_rebuild_ver,

src/rebuild/srv.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,7 +1459,8 @@ rebuild_leader_start(struct ds_pool *pool, struct rebuild_task *task,
14591459
*/
14601460
ds_rebuild_running_query_adv(pool->sp_uuid, -1, &version, NULL, &generation,
14611461
&rebuild_leader_rank, &rebuild_leader_term);
1462-
if ((version < task->dst_map_ver) ||
1462+
if (task->dst_rebuild_op == RB_OP_RECLAIM || task->dst_rebuild_op == RB_OP_FAIL_RECLAIM ||
1463+
(version < task->dst_map_ver) ||
14631464
(version == task->dst_map_ver && leader_rank == rebuild_leader_rank &&
14641465
leader_term == rebuild_leader_term))
14651466
generation = ++pool->sp_rebuild_gen;
@@ -1806,6 +1807,8 @@ rebuild_task_ult(void *arg)
18061807
* rebuild.
18071808
*/
18081809
if (rgt && rgt->rgt_init_scan) {
1810+
struct rebuild_tgt_pool_tracker *local_rpt;
1811+
18091812
if (myrank != pool->sp_iv_ns->iv_master_rank) {
18101813
/* If master has been changed, then let's skip
18111814
* iv sync, and the new leader will take over
@@ -1817,6 +1820,14 @@ rebuild_task_ult(void *arg)
18171820
}
18181821

18191822
rebuild_leader_status_notify(rgt, pool, task->dst_rebuild_op, myrank);
1823+
1824+
local_rpt = rpt_lookup(pool->sp_uuid, task->dst_rebuild_op, rgt->rgt_rebuild_ver,
1825+
rgt->rgt_rebuild_gen);
1826+
if (local_rpt) {
1827+
local_rpt->rt_abort = 1;
1828+
D_INFO(DF_RB " set rt_abort", DP_RB_RPT(local_rpt));
1829+
rpt_put(local_rpt);
1830+
}
18201831
}
18211832

18221833
try_reschedule:
@@ -2385,8 +2396,7 @@ rebuild_tgt_fini(struct rebuild_tgt_pool_tracker *rpt)
23852396
/* destroy the migrate_tls of 0-xstream */
23862397
ds_migrate_stop(rpt->rt_pool, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen);
23872398
/* No one should access rpt after rebuild_fini_one. */
2388-
D_INFO("Finalized rebuild for "DF_UUID", map_ver=%u.\n",
2389-
DP_UUID(rpt->rt_pool_uuid), rpt->rt_rebuild_ver);
2399+
DL_INFO(rc, DF_RB " Finalized rebuild", DP_RB_RPT(rpt));
23902400
rpt_delete(rpt);
23912401
}
23922402

@@ -2515,8 +2525,12 @@ rebuild_tgt_status_check_ult(void *arg)
25152525
* it can not find the IV see crt_iv_hdlr_xx().
25162526
* let's just stop the rebuild.
25172527
*/
2518-
if (rc == -DER_NONEXIST && !status.rebuilding)
2528+
if (rc == -DER_NONEXIST && !status.rebuilding) {
2529+
D_INFO(DF_RB ", rc %d, status.rebuilding %d, "
2530+
"set rt_global_done",
2531+
DP_RB_RPT(rpt), rc, status.rebuilding);
25192532
rpt->rt_global_done = 1;
2533+
}
25202534

25212535
if (ns->iv_stop) {
25222536
D_DEBUG(DB_REBUILD, "abort rebuild "

utils/rpms/daos.spec

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
%else
1111
%global daos_build_args client test
1212
%endif
13-
%global mercury_version 2.4.1-2
14-
%global mercury_version_next 2.4.1-3
13+
%global mercury_version 2.4.1
14+
%global mercury_version_max 2.4.1-2
1515
%global libfabric_version 1.20
1616
%global argobots_version 1.2-3
1717
%global __python %{__python3}
@@ -38,7 +38,7 @@ BuildRequires: scons >= 2.4
3838
%endif
3939
BuildRequires: libfabric-devel >= %{libfabric_version}
4040
BuildRequires: mercury-devel >= %{mercury_version}
41-
BuildRequires: mercury-devel < %{mercury_version_next}
41+
BuildRequires: mercury-devel <= %{mercury_version_max}
4242
BuildRequires: gcc-c++
4343
%if (0%{?rhel} >= 8)
4444
%global openmpi openmpi
@@ -138,6 +138,7 @@ Requires: mercury-libfabric >= %{mercury_version}
138138
Requires: mercury-libfabric < %{mercury_version_next}
139139

140140

141+
141142
%description
142143
The Distributed Asynchronous Object Storage (DAOS) is an open-source
143144
software-defined object store designed from the ground up for

0 commit comments

Comments
 (0)