Skip to content

Commit df49a51

Browse files
committed
Create a v2 snapshot when running etcdutl migrate command
Also added test to cover the etcdutl migrate command Signed-off-by: Benjamin Wang <[email protected]>
1 parent ad11af7 commit df49a51

File tree

4 files changed

+192
-29
lines changed

4 files changed

+192
-29
lines changed

etcdutl/etcdutl/common.go

+83
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,19 @@ package etcdutl
1616

1717
import (
1818
"errors"
19+
"fmt"
1920

2021
"go.uber.org/zap"
2122
"go.uber.org/zap/zapcore"
2223

2324
"go.etcd.io/etcd/client/pkg/v3/logutil"
2425
"go.etcd.io/etcd/pkg/v3/cobrautl"
26+
"go.etcd.io/etcd/server/v3/etcdserver"
27+
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
2528
"go.etcd.io/etcd/server/v3/etcdserver/api/snap"
29+
"go.etcd.io/etcd/server/v3/storage/backend"
2630
"go.etcd.io/etcd/server/v3/storage/datadir"
31+
"go.etcd.io/etcd/server/v3/storage/schema"
2732
"go.etcd.io/etcd/server/v3/storage/wal"
2833
"go.etcd.io/etcd/server/v3/storage/wal/walpb"
2934
"go.etcd.io/raft/v3/raftpb"
@@ -68,3 +73,81 @@ func getLatestV2Snapshot(lg *zap.Logger, dataDir string) (*raftpb.Snapshot, erro
6873

6974
return snapshot, nil
7075
}
76+
77+
func createV2SnapshotFromV3Store(dataDir string, be backend.Backend) error {
78+
var (
79+
lg = GetLogger()
80+
81+
snapDir = datadir.ToSnapDir(dataDir)
82+
walDir = datadir.ToWALDir(dataDir)
83+
)
84+
85+
ci, term := schema.ReadConsistentIndex(be.ReadTx())
86+
87+
cl := membership.NewCluster(lg)
88+
cl.SetBackend(schema.NewMembershipBackend(lg, be))
89+
cl.UnsafeLoad()
90+
91+
latestWALSnap, err := getLatestWALSnap(lg, dataDir)
92+
if err != nil {
93+
return err
94+
}
95+
96+
// Each time before creating the v2 snapshot, etcdserve always flush
97+
// the backend storage (bbolt db), so the consistent index should never
98+
// less than the Index or term of the latest snapshot.
99+
if ci < latestWALSnap.Index || term < latestWALSnap.Term {
100+
// This should never happen
101+
return fmt.Errorf("consistent_index [Index: %d, Term: %d] is less than the latest snapshot [Index: %d, Term: %d]", ci, term, latestWALSnap.Index, latestWALSnap.Term)
102+
}
103+
104+
voters, learners := getVotersAndLearners(cl)
105+
confState := raftpb.ConfState{
106+
Voters: voters,
107+
Learners: learners,
108+
}
109+
110+
// create the v2 snaspshot file
111+
raftSnap := raftpb.Snapshot{
112+
Data: etcdserver.GetMembershipInfoInV2Format(lg, cl),
113+
Metadata: raftpb.SnapshotMetadata{
114+
Index: ci,
115+
Term: term,
116+
ConfState: confState,
117+
},
118+
}
119+
sn := snap.New(lg, snapDir)
120+
if err = sn.SaveSnap(raftSnap); err != nil {
121+
return err
122+
}
123+
124+
// save WAL snapshot record
125+
w, err := wal.Open(lg, walDir, latestWALSnap)
126+
if err != nil {
127+
return err
128+
}
129+
defer w.Close()
130+
// We must read all records to locate the tail of the last valid WAL file.
131+
if _, _, _, err = w.ReadAll(); err != nil {
132+
return err
133+
}
134+
135+
return w.SaveSnapshot(walpb.Snapshot{Index: ci, Term: term, ConfState: &confState})
136+
}
137+
138+
func getVotersAndLearners(cl *membership.RaftCluster) ([]uint64, []uint64) {
139+
var (
140+
voters []uint64
141+
learners []uint64
142+
)
143+
for _, m := range cl.Members() {
144+
if m.IsLearner {
145+
learners = append(learners, uint64(m.ID))
146+
continue
147+
}
148+
149+
voters = append(voters, uint64(m.ID))
150+
}
151+
152+
return voters, learners
153+
}

etcdutl/etcdutl/migrate_command.go

+48-19
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ func (o *migrateOptions) AddFlags(cmd *cobra.Command) {
7474

7575
func (o *migrateOptions) Config() (*migrateConfig, error) {
7676
c := &migrateConfig{
77-
force: o.force,
78-
lg: GetLogger(),
77+
force: o.force,
78+
dataDir: o.dataDir,
79+
lg: GetLogger(),
7980
}
8081
var err error
8182
dotCount := strings.Count(o.targetVersion, ".")
@@ -90,47 +91,73 @@ func (o *migrateOptions) Config() (*migrateConfig, error) {
9091
return nil, fmt.Errorf(`target version %q not supported. Minimal "3.5"`, storageVersionToString(c.targetVersion))
9192
}
9293

93-
dbPath := datadir.ToBackendFileName(o.dataDir)
94-
c.be = backend.NewDefaultBackend(GetLogger(), dbPath)
94+
return c, nil
95+
}
96+
97+
type migrateConfig struct {
98+
lg *zap.Logger
99+
be backend.Backend
100+
targetVersion *semver.Version
101+
walVersion schema.WALVersion
102+
dataDir string
103+
force bool
104+
}
95105

96-
walPath := datadir.ToWALDir(o.dataDir)
97-
walSnap, err := getLatestWALSnap(c.lg, o.dataDir)
106+
func (c *migrateConfig) finalize() error {
107+
walPath := datadir.ToWALDir(c.dataDir)
108+
walSnap, err := getLatestWALSnap(c.lg, c.dataDir)
98109
if err != nil {
99-
return nil, fmt.Errorf("failed to get the lastest snapshot: %w", err)
110+
return fmt.Errorf("failed to get the lastest snapshot: %w", err)
100111
}
101112
w, err := wal.OpenForRead(c.lg, walPath, walSnap)
102113
if err != nil {
103-
return nil, fmt.Errorf(`failed to open wal: %w`, err)
114+
return fmt.Errorf(`failed to open wal: %w`, err)
104115
}
105116
defer w.Close()
106117
c.walVersion, err = wal.ReadWALVersion(w)
107118
if err != nil {
108-
return nil, fmt.Errorf(`failed to read wal: %w`, err)
119+
return fmt.Errorf(`failed to read wal: %w`, err)
109120
}
110121

111-
return c, nil
112-
}
113-
114-
type migrateConfig struct {
115-
lg *zap.Logger
116-
be backend.Backend
117-
targetVersion *semver.Version
118-
walVersion schema.WALVersion
119-
force bool
122+
return nil
120123
}
121124

122125
func migrateCommandFunc(c *migrateConfig) error {
126+
dbPath := datadir.ToBackendFileName(c.dataDir)
127+
c.be = backend.NewDefaultBackend(GetLogger(), dbPath)
123128
defer c.be.Close()
129+
124130
tx := c.be.BatchTx()
125131
current, err := schema.DetectSchemaVersion(c.lg, c.be.ReadTx())
126132
if err != nil {
127-
c.lg.Error("failed to detect storage version. Please make sure you are using data dir from etcd v3.5 and older")
133+
c.lg.Error("failed to detect storage version. Please make sure you are using data dir from etcd v3.5 and older", zap.Error(err))
128134
return err
129135
}
130136
if current == *c.targetVersion {
131137
c.lg.Info("storage version up-to-date", zap.String("storage-version", storageVersionToString(&current)))
132138
return nil
133139
}
140+
141+
// only generate a v2 snapshot file for downgrade case
142+
if c.targetVersion.LessThan(current) {
143+
// Update cluster version
144+
be := schema.NewMembershipBackend(c.lg, c.be)
145+
be.MustSaveClusterVersionToBackend(c.targetVersion)
146+
147+
// forcibly create a v2 snapshot file
148+
// TODO: remove in 3.8
149+
if err = createV2SnapshotFromV3Store(c.dataDir, c.be); err != nil {
150+
c.lg.Error("Failed to create v2 snapshot file", zap.Error(err))
151+
return err
152+
}
153+
c.lg.Info("Generated a v2 snapshot file")
154+
}
155+
156+
if err = c.finalize(); err != nil {
157+
c.lg.Error("Failed to finalize config", zap.Error(err))
158+
return err
159+
}
160+
134161
err = schema.Migrate(c.lg, tx, c.walVersion, *c.targetVersion)
135162
if err != nil {
136163
if !c.force {
@@ -139,7 +166,9 @@ func migrateCommandFunc(c *migrateConfig) error {
139166
c.lg.Info("normal migrate failed, trying with force", zap.Error(err))
140167
migrateForce(c.lg, tx, c.targetVersion)
141168
}
169+
142170
c.be.ForceCommit()
171+
143172
return nil
144173
}
145174

server/etcdserver/api/membership/cluster.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -256,22 +256,28 @@ func (c *RaftCluster) SetVersionChangedNotifier(n *notify.Notifier) {
256256
c.versionChanged = n
257257
}
258258

259-
func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
260-
c.Lock()
261-
defer c.Unlock()
262-
259+
func (c *RaftCluster) UnsafeLoad() {
263260
if c.be != nil {
264261
c.version = c.be.ClusterVersionFromBackend()
265262
c.members, c.removed = c.be.MustReadMembersFromBackend()
266263
} else {
267264
c.version = clusterVersionFromStore(c.lg, c.v2store)
268265
c.members, c.removed = membersFromStore(c.lg, c.v2store)
269266
}
270-
c.buildMembershipMetric()
271267

272268
if c.be != nil {
273269
c.downgradeInfo = c.be.DowngradeInfoFromBackend()
274270
}
271+
}
272+
273+
func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
274+
c.Lock()
275+
defer c.Unlock()
276+
277+
c.UnsafeLoad()
278+
279+
c.buildMembershipMetric()
280+
275281
sv := semver.Must(semver.NewVersion(version.Version))
276282
if c.downgradeInfo != nil && c.downgradeInfo.Enabled {
277283
c.lg.Info(

tests/e2e/utl_migrate_test.go

+50-5
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ func TestEtctlutlMigrate(t *testing.T) {
4949

5050
expectLogsSubString string
5151
expectStorageVersion *semver.Version
52+
expectTargetBinary string
5253
}{
5354
{
5455
name: "Invalid target version string",
@@ -81,23 +82,25 @@ func TestEtctlutlMigrate(t *testing.T) {
8182
{
8283
name: "Migrate v3.5 to v3.5 is no-op",
8384
clusterVersion: e2e.LastVersion,
84-
clusterSize: 1,
8585
targetVersion: "3.5",
86+
clusterSize: 1,
8687
expectLogsSubString: "storage version up-to-date\t" + `{"storage-version": "3.5"}`,
8788
},
8889
{
8990
name: "Upgrade 1 member cluster from v3.5 to v3.6 should work",
9091
clusterVersion: e2e.LastVersion,
91-
clusterSize: 1,
9292
targetVersion: "3.6",
93+
clusterSize: 1,
9394
expectStorageVersion: &version.V3_6,
95+
expectTargetBinary: e2e.BinPath.Etcd,
9496
},
9597
{
9698
name: "Upgrade 3 member cluster from v3.5 to v3.6 should work",
9799
clusterVersion: e2e.LastVersion,
98-
clusterSize: 3,
99100
targetVersion: "3.6",
101+
clusterSize: 3,
100102
expectStorageVersion: &version.V3_6,
103+
expectTargetBinary: e2e.BinPath.Etcd,
101104
},
102105
{
103106
name: "Migrate v3.6 to v3.6 is no-op",
@@ -112,13 +115,15 @@ func TestEtctlutlMigrate(t *testing.T) {
112115
clusterSize: 1,
113116
expectLogsSubString: "updated storage version",
114117
expectStorageVersion: nil, // 3.5 doesn't have the field `storageVersion`, so it returns nil.
118+
expectTargetBinary: e2e.BinPath.EtcdLastRelease,
115119
},
116120
{
117121
name: "Downgrade 3 member cluster from v3.6 to v3.5 should work",
118122
targetVersion: "3.5",
119123
clusterSize: 3,
120124
expectLogsSubString: "updated storage version",
121125
expectStorageVersion: nil, // 3.5 doesn't have the field `storageVersion`, so it returns nil.
126+
expectTargetBinary: e2e.BinPath.EtcdLastRelease,
122127
},
123128
{
124129
name: "Upgrade v3.6 to v3.7 with force should work",
@@ -141,7 +146,7 @@ func TestEtctlutlMigrate(t *testing.T) {
141146
epc, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
142147
e2e.WithVersion(tc.clusterVersion),
143148
e2e.WithDataDirPath(dataDirPath),
144-
e2e.WithClusterSize(1),
149+
e2e.WithClusterSize(tc.clusterSize),
145150
e2e.WithKeepDataDir(true),
146151
// Set low SnapshotCount to ensure wal snapshot is done
147152
e2e.WithSnapshotCount(1),
@@ -163,7 +168,7 @@ func TestEtctlutlMigrate(t *testing.T) {
163168
require.NoError(t, e2e.SpawnWithExpect(append(prefixArgs, "put", fmt.Sprintf("%d", i), "value"), expect.ExpectedResponse{Value: "OK"}))
164169
}
165170

166-
t.Log("Stopping the the members")
171+
t.Log("Stopping all the servers")
167172
for i := 0; i < len(epc.Procs); i++ {
168173
t.Logf("Stopping server %d: %v", i, epc.Procs[i].EndpointsGRPC())
169174
err = epc.Procs[i].Stop()
@@ -190,6 +195,46 @@ func TestEtctlutlMigrate(t *testing.T) {
190195
assert.Equal(t, tc.expectStorageVersion, ver)
191196
be.Close()
192197
}
198+
199+
if len(tc.expectTargetBinary) == 0 || !fileutil.Exist(tc.expectTargetBinary) {
200+
return
201+
}
202+
203+
t.Log("Start all members with new binary")
204+
for i := 0; i < len(epc.Procs); i++ {
205+
t.Logf("Replace binary for member %d: %v", i, epc.Procs[i].EndpointsGRPC())
206+
member := epc.Procs[i]
207+
member.Config().ExecPath = tc.expectTargetBinary
208+
}
209+
require.NoError(t, epc.Start(context.TODO()))
210+
211+
t.Log("Verify the versions of all members")
212+
for i := 0; i < len(epc.Procs); i++ {
213+
t.Logf("Verify the version of member %d: %v", i, epc.Procs[i].EndpointsGRPC())
214+
expectedVersion := tc.expectStorageVersion
215+
if expectedVersion == nil {
216+
expectedVersion = &version.V3_5
217+
}
218+
219+
verifyVersion(t, epc, epc.Procs[i], expectedVersion, expectedVersion)
220+
}
193221
})
194222
}
195223
}
224+
225+
func verifyVersion(t *testing.T, clus *e2e.EtcdProcessCluster, member e2e.EtcdProcess, expectedServerVersion, expectedClusterVersion *semver.Version) error {
226+
var err error
227+
expected := fmt.Sprintf(`"etcdserver":"%d.%d\..*"etcdcluster":"%d\.%d\.`, expectedServerVersion.Major, expectedServerVersion.Minor, expectedClusterVersion.Major, expectedClusterVersion.Minor)
228+
for i := 0; i < 35; i++ {
229+
if err = e2e.CURLGetFromMember(clus, member, e2e.CURLReq{Endpoint: "/version", Expected: expect.ExpectedResponse{Value: expected, IsRegularExpr: true}}); err != nil {
230+
t.Logf("#%d: v3 is not ready yet (%v)", i, err)
231+
time.Sleep(200 * time.Millisecond)
232+
continue
233+
}
234+
break
235+
}
236+
if err != nil {
237+
return fmt.Errorf("failed to verify version, expected %v got (%w)", expected, err)
238+
}
239+
return nil
240+
}

0 commit comments

Comments
 (0)