Skip to content

Commit 9a607a8

Browse files
committed
ovn db: join cluster with raft header backup (#6106)
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
1 parent dabfbb9 commit 9a607a8

File tree

4 files changed

+258
-28
lines changed

4 files changed

+258
-28
lines changed

dist/images/Dockerfile.base

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ ADD patches/f627b7721ec282f2edaf798913b1559b939687f0.patch $SRC_DIR
4545
ADD patches/3f3e3a436ff5eb2eaafbeeae8ea9dc0c514fe8a3.patch $SRC_DIR
4646
ADD patches/a6cb8215a80635129e4fada4c0d25c25fb746bf7.patch $SRC_DIR
4747
ADD patches/d4d76ddb2e12cdd9e73bb5e008ebb9fd1b4d6ca6.patch $SRC_DIR
48+
ADD patches/53b9837f7fe89cd63af735b039b8c26107d6579d.patch $SRC_DIR
4849
ADD patches/ffd2328d4a55271569e2b89e54a2c18f4e186af8.patch $SRC_DIR
4950
ADD patches/d088c5d8c263552c5a31d87813991aee30ab74de.patch $SRC_DIR
5051
ADD patches/1b31f07dc60c016153fa35d936cdda0e02e58492.patch $SRC_DIR
@@ -87,6 +88,8 @@ RUN cd /usr/src/ && \
8788
git apply $SRC_DIR/a6cb8215a80635129e4fada4c0d25c25fb746bf7.patch && \
8889
# ovsdb-tool: add command fix-cluster
8990
git apply $SRC_DIR/d4d76ddb2e12cdd9e73bb5e008ebb9fd1b4d6ca6.patch && \
91+
# ovsdb-tool: add commands db-raft-header/rejoin-cluster
92+
git apply $SRC_DIR/53b9837f7fe89cd63af735b039b8c26107d6579d.patch && \
9093
# netdev: reduce cpu utilization for getting device addresses
9194
git apply $SRC_DIR/ffd2328d4a55271569e2b89e54a2c18f4e186af8.patch && \
9295
# ovs-router: skip getting source address for kube-ipvs0
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
From 53b9837f7fe89cd63af735b039b8c26107d6579d Mon Sep 17 00:00:00 2001
2+
From: zhangzujian <zhangzujian.7@gmail.com>
3+
Date: Fri, 26 Dec 2025 08:24:04 +0000
4+
Subject: [PATCH] ovsdb-tool: add commands db-raft-header/rejoin-cluster
5+
6+
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
7+
---
8+
ovsdb/ovsdb-tool.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++
9+
1 file changed, 93 insertions(+)
10+
11+
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
12+
index bfd54a97f..9ec2c5d48 100644
13+
--- a/ovsdb/ovsdb-tool.c
14+
+++ b/ovsdb/ovsdb-tool.c
15+
@@ -358,6 +358,63 @@ do_join_cluster(struct ovs_cmdl_context *ctx)
16+
sset_destroy(&remote_addrs);
17+
}
18+
19+
+static void
20+
+do_rejoin_cluster(struct ovs_cmdl_context *ctx)
21+
+{
22+
+ const char *db_file_name = ctx->argv[1];
23+
+ const char *hdr_file_name = ctx->argv[2];
24+
+ const char *local = ctx->argv[3];
25+
+ struct json *remote_addrs = json_array_create_empty();
26+
+ for (size_t i = 4; i < ctx->argc; i++) {
27+
+ json_array_add(remote_addrs, json_string_create(ctx->argv[i]));
28+
+ }
29+
+
30+
+ FILE *fp = fopen(hdr_file_name, "r");
31+
+ if (fp == NULL) {
32+
+ ovs_fatal(errno, "failed to open %s for reading", hdr_file_name);
33+
+ }
34+
+
35+
+ char data[4096];
36+
+ size_t n = fread(data, 1, sizeof data, fp);
37+
+ if (n == 0 && ferror(fp)) {
38+
+ ovs_fatal(errno, "failed to read from %s", hdr_file_name);
39+
+ }
40+
+ fclose(fp);
41+
+ if (n == sizeof data) {
42+
+ ovs_fatal(0, "%s: header file too large", hdr_file_name);
43+
+ }
44+
+ data[n] = '\0';
45+
+ struct json *hdr = parse_json(data);
46+
+ if (hdr->type != JSON_OBJECT) {
47+
+ ovs_fatal(0, "%s: expected JSON object", hdr_file_name);
48+
+ }
49+
+ json_object_put(hdr, "remote_addresses", remote_addrs);
50+
+
51+
+ struct raft_header h;
52+
+ check_ovsdb_error(raft_header_from_json(&h, hdr));
53+
+ json_destroy(hdr);
54+
+
55+
+ if (!h.name || !*h.name) {
56+
+ ovs_fatal(0, "%s: missing or empty database name", hdr_file_name);
57+
+ }
58+
+ if (!ovsdb_parser_is_id(h.name)) {
59+
+ ovs_fatal(0, "%s: not a valid schema name", h.name);
60+
+ }
61+
+ if (uuid_is_zero(&h.cid)) {
62+
+ ovs_fatal(0, "%s: missing or zero cluster ID", hdr_file_name);
63+
+ }
64+
+ if (uuid_is_zero(&h.sid)) {
65+
+ ovs_fatal(0, "%s: missing or zero server ID", hdr_file_name);
66+
+ }
67+
+
68+
+ /* Create database file. */
69+
+ check_ovsdb_error(raft_join_cluster(db_file_name, h.name, local,
70+
+ &h.remote_addresses,
71+
+ uuid_is_zero(&h.cid) ? NULL : &h.cid,
72+
+ uuid_is_zero(&h.sid) ? NULL : &h.sid));
73+
+ raft_header_uninit(&h);
74+
+}
75+
+
76+
static struct ovsdb_error *
77+
write_standalone_db(const char *file_name, const char *comment,
78+
const struct ovsdb *db)
79+
@@ -584,6 +641,39 @@ do_db_local_address(struct ovs_cmdl_context *ctx)
80+
raft_metadata_destroy(&md);
81+
}
82+
83+
+static void
84+
+do_db_raft_header(struct ovs_cmdl_context *ctx)
85+
+{
86+
+ const char *db_file_name = ctx->argv[1];
87+
+ struct ovsdb_log *log;
88+
+
89+
+ check_ovsdb_error(ovsdb_log_open(db_file_name, OVSDB_MAGIC"|"RAFT_MAGIC,
90+
+ OVSDB_LOG_READ_ONLY, -1, &log));
91+
+ if (strcmp(ovsdb_log_get_magic(log), RAFT_MAGIC)) {
92+
+ ovs_fatal(0, "%s: not a clustered database", db_file_name);
93+
+ }
94+
+
95+
+ struct json *header;
96+
+ check_ovsdb_error(ovsdb_log_read(log, &header));
97+
+ ovsdb_log_close(log);
98+
+
99+
+ struct raft_header h;
100+
+ check_ovsdb_error(raft_header_from_json(&h, header));
101+
+ json_destroy(header);
102+
+ raft_entry_uninit(&h.snap);
103+
+ h.snap.data.full_json = NULL;
104+
+ h.snap.data.serialized = NULL;
105+
+ h.snap.servers = NULL;
106+
+ h.snap_index = 0;
107+
+
108+
+ header = raft_header_to_json(&h);
109+
+ raft_header_uninit(&h);
110+
+ char *s = json_to_string(header, JSSF_PRETTY);
111+
+ json_destroy(header);
112+
+ puts(s);
113+
+ free(s);
114+
+}
115+
+
116+
static void
117+
do_db_has_magic(struct ovs_cmdl_context *ctx, const char *magic)
118+
{
119+
@@ -2153,6 +2243,8 @@ static const struct ovs_cmdl_command all_commands[] = {
120+
{ "create-cluster", "db contents local", 3, 3, do_create_cluster, OVS_RW },
121+
{ "join-cluster", "db name local remote...", 4, INT_MAX, do_join_cluster,
122+
OVS_RW },
123+
+ { "rejoin-cluster", "db hdr local remote...", 4, INT_MAX, do_rejoin_cluster,
124+
+ OVS_RW },
125+
{ "compact", "[db [dst]]", 0, 2, do_compact, OVS_RW },
126+
{ "convert", "[db [schema [dst]]]", 0, 3, do_convert, OVS_RW },
127+
{ "needs-conversion", NULL, 0, 2, do_needs_conversion, OVS_RO },
128+
@@ -2162,6 +2254,7 @@ static const struct ovs_cmdl_command all_commands[] = {
129+
{ "db-cid", "db", 1, 1, do_db_cid, OVS_RO },
130+
{ "db-sid", "db", 1, 1, do_db_sid, OVS_RO },
131+
{ "db-local-address", "db", 1, 1, do_db_local_address, OVS_RO },
132+
+ { "db-raft-header", "db", 1, 1, do_db_raft_header, OVS_RO },
133+
{ "db-is-clustered", "db", 1, 1, do_db_is_clustered, OVS_RO },
134+
{ "db-is-standalone", "db", 1, 1, do_db_is_standalone, OVS_RO },
135+
{ "schema-name", "[schema]", 0, 1, do_schema_name, OVS_RO },
136+
--
137+
2.43.0
138+

dist/images/start-db.sh

Lines changed: 62 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ function ovndb_query_leader {
115115

116116
eval port="\$${db_eval}_PORT"
117117
query='["_Server",{"table":"Database","where":[["name","==","'$db'"]],"columns":["leader"],"op":"select"}]'
118-
if [[ "$ENABLE_SSL" == "false" ]]; then
119-
timeout 10 ovsdb-client query $(gen_conn_addr $i $port) "$query"
118+
if [[ "$ENABLE_SSL" == "false" ]]; then
119+
timeout 10 ovsdb-client query $(gen_conn_addr $2 $port) "$query"
120120
else
121-
timeout 10 ovsdb-client $SSL_OPTIONS query $(gen_conn_addr $i $port) "$query"
121+
timeout 10 ovsdb-client $SSL_OPTIONS query $(gen_conn_addr $2 $port) "$query"
122122
fi
123123
}
124124

@@ -128,7 +128,7 @@ function quit {
128128
}
129129

130130
function is_clustered {
131-
for i in $(echo -n "${NODE_IPS}" | sed 's/,/ /g'); do
131+
for i in $(echo -n "${NODE_IPS}" | sed 's/,/ /g'); do
132132
nb_leader=$(ovndb_query_leader nb $i)
133133
if [[ $nb_leader =~ "true" ]]; then
134134
return 0
@@ -172,9 +172,42 @@ function ovn_db_pre_start() {
172172
;;
173173
esac
174174

175+
eval port="\$${db_eval}_CLUSTER_PORT"
176+
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
177+
echo "local address: $local_addr"
178+
179+
local remote_addr=()
180+
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
181+
for node_ip in ${node_ips[*]}; do
182+
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
183+
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
184+
fi
185+
done
186+
echo "remote addresses: ${remote_addr[*]}"
187+
175188
local db_file="/etc/ovn/ovn${1}_db.db"
189+
local hdr_file="/etc/ovn/ovn${1}_db.hdr"
176190
if [ -e "$db_file" ]; then
191+
# check whether db file is corrupted
192+
db_name=$(ovsdb-tool db-name "$db_file" || true)
193+
if [ "$db_name" != "$db" ]; then
194+
# db file is corrupted and we cannot get the sid from it
195+
# we have a chance to rebuild it from raft header
196+
echo "ovn db file $db_file is corrupted, mv it away."
197+
local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
198+
echo "backup $db_file to $db_bak"
199+
mv "$db_file" "$db_bak" || return 1
200+
if [ ${#remote_addr[*]} -ne 0 -a -e "$hdr_file" ]; then
201+
# rebuild db file from raft header generated by the leader check process
202+
echo "generating new db file $db_file from raft header file $hdr_file"
203+
ovsdb-tool rejoin-cluster "$db_file" "$hdr_file" "$local_addr" ${remote_addr[*]}
204+
fi
205+
return
206+
fi
207+
208+
# check whether the db file is standalone or clustered
177209
if ovsdb-tool db-is-clustered "$db_file"; then
210+
# if the db file is clustered, check whether it has joined the cluster
178211
local msg=$(ovsdb-tool check-cluster "$db_file" 2>&1) || true
179212
if echo $msg | grep -q 'has not joined the cluster'; then
180213
local birth_time=$(stat --format=%W $db_file)
@@ -187,6 +220,7 @@ function ovn_db_pre_start() {
187220
fi
188221

189222
if ! ovsdb-tool check-cluster "$db_file"; then
223+
# clustered db file is corrupted
190224
local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
191225
echo "backup $db_file to $db_bak"
192226
cp "$db_file" "$db_bak" || return 1
@@ -199,39 +233,39 @@ function ovn_db_pre_start() {
199233
fixed=1
200234
fi
201235
fi
202-
if [ $fixed -ne 1 ]; then
236+
if [ $fixed -ne 1 -a ${#remote_addr[*]} -ne 0 ]; then
203237
echo "failed to fix database file $db_file, rebuild it."
238+
local db_new="$db_file.init-$(date +%s)-$(random_str)"
239+
if [ -e "$hdr_file" ]; then
240+
echo "generating new db file $db_new from raft header file $hdr_file"
241+
if ovsdb-tool rejoin-cluster "$db_new" "$hdr_file" "$local_addr" ${remote_addr[*]}; then
242+
echo "use new database file $db_new"
243+
mv "$db_new" "$db_file"
244+
return
245+
fi
246+
fi
247+
248+
# no raft header file, try to reuse the sid from the corrupted db file
204249
local sid=$(ovsdb-tool db-sid "$db_file")
205250
if ! echo -n "$sid" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
206251
echo "failed to get sid from db file $db_file"
207252
return 1
208253
fi
209-
echo "get local server id $sid"
210-
211-
eval port="\$${db_eval}_CLUSTER_PORT"
212-
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
213-
echo "local address: $local_addr"
214-
215-
local remote_addr=()
216-
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
217-
for node_ip in ${node_ips[*]}; do
218-
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
219-
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
220-
fi
221-
done
222-
echo "remote addresses: ${remote_addr[*]}"
223-
224-
local db_new="$db_file.init-$(date +%s)-$(random_str)"
225-
echo "generating new database file $db_new"
226-
if [ ${#remote_addr[*]} -ne 0 ]; then
227-
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1
228-
229-
echo "use new database file $db_new"
230-
mv "$db_new" "$db_file"
231-
fi
254+
echo "generating new database file $db_new with sid $sid"
255+
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1
256+
echo "use new database file $db_new"
257+
mv "$db_new" "$db_file"
232258
fi
233259
fi
234260
fi
261+
elif [ -e "$hdr_file" ]; then
262+
echo "db file $db_file is missing, while raft header file $hdr_file exists."
263+
if [ ${#remote_addr[*]} -ne 0 -a -e "$hdr_file" ]; then
264+
# rebuild db file from raft header generated by the leader check process
265+
echo "generating new db file $db_file from raft header file $hdr_file"
266+
ovsdb-tool rejoin-cluster "$db_file" "$hdr_file" "$local_addr" ${remote_addr[*]}
267+
fi
268+
return
235269
fi
236270

237271
# create local config

pkg/ovn_leader_checker/ovn.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package ovn_leader_checker
22

33
import (
4+
"bytes"
45
"context"
6+
"encoding/json"
57
"errors"
68
"flag"
79
"fmt"
@@ -363,6 +365,56 @@ func compactOvnDatabase(db string) {
363365
}
364366
}
365367

368+
// backupRaftHeader backs up the raft header of the ovn db file.
369+
// The backup file name is ovn<db>_db.hdr, e.g., ovnnb_db.hdr for ovnnb database file named ovnnb_db.db.
370+
// Example content of the header file:
371+
//
372+
// {
373+
// "server_id": "8d77699d-8dc6-4f32-b1ba-b66aad05ba46",
374+
// "name": "OVN_Northbound",
375+
// "local_address": "tcp:[172.18.0.2]:6643",
376+
// "cluster_id": "6d240b86-177e-4f17-aded-ed1b7b364d97"
377+
// }
378+
func backupRaftHeader(db string) {
379+
args := []string{"db-raft-header", fmt.Sprintf("/etc/ovn/ovn%s_db.db", db)}
380+
hdr, err := exec.Command("ovsdb-tool", args...).CombinedOutput() // #nosec G204
381+
if err != nil {
382+
klog.Errorf("failed to backup raft header of ovn%s database: error = %v, output = %s", db, err, string(hdr))
383+
return
384+
}
385+
386+
var data map[string]any
387+
if err = json.Unmarshal(hdr, &data); err != nil {
388+
klog.Errorf("failed to unmarshal raft header json content for ovn%s database: %v", db, err)
389+
return
390+
}
391+
392+
hdr, _ = json.MarshalIndent(data, "", " ")
393+
hdrFile := fmt.Sprintf("/etc/ovn/ovn%s_db.hdr", db)
394+
content, err := os.ReadFile(hdrFile)
395+
if err != nil {
396+
if !os.IsNotExist(err) {
397+
klog.Errorf("failed to read raft header file %s: %v", hdrFile, err)
398+
}
399+
klog.V(5).Infof("raft header file %s does not exist, created new one", hdrFile)
400+
}
401+
402+
if bytes.Equal(content, hdr) {
403+
klog.V(5).Infof("raft header file %s is up-to-date, no need to update", hdrFile)
404+
return
405+
}
406+
407+
klog.Infof("Found changes in raft header for ovn%s database, updating file %s", db, hdrFile)
408+
klog.Infof("Previous content of raft header file %s:\n%s", hdrFile, string(content))
409+
410+
if err = os.WriteFile(hdrFile, hdr, 0o600); err != nil {
411+
klog.Errorf("failed to write raft header file %s: %v", hdrFile, err)
412+
return
413+
}
414+
415+
klog.Infof("succeeded to backup raft header of ovn%s database to file %s with content:\n%s", db, hdrFile, string(hdr))
416+
}
417+
366418
func doOvnLeaderCheck(cfg *Configuration, podName, podNamespace string) {
367419
if podName == "" || podNamespace == "" {
368420
util.LogFatalAndExit(nil, "env variables POD_NAME and POD_NAMESPACE must be set")
@@ -409,6 +461,9 @@ func doOvnLeaderCheck(cfg *Configuration, podName, podNamespace string) {
409461
compactOvnDatabase("nb")
410462
compactOvnDatabase("sb")
411463
}
464+
465+
backupRaftHeader("nb")
466+
backupRaftHeader("sb")
412467
} else {
413468
icNbLeader := isDBLeader(cfg.localAddress, "OVN_IC_Northbound")
414469
icSbLeader := isDBLeader(cfg.localAddress, "OVN_IC_Southbound")

0 commit comments

Comments
 (0)