@@ -109,6 +109,7 @@ const (
109
109
readyPercentThreshold = 0.9
110
110
111
111
DowngradeEnabledPath = "/downgrade/enabled"
112
+ memorySnapshotCount = 10
112
113
)
113
114
114
115
var (
@@ -291,9 +292,10 @@ type EtcdServer struct {
291
292
clusterVersionChanged * notify.Notifier
292
293
293
294
* AccessController
294
- // forceSnapshot can force snapshot be triggered after apply, independent of the snapshotCount.
295
+ // forceDiskSnapshot can force snapshot be triggered after apply, independent of the snapshotCount.
295
296
// Should only be set within apply code path. Used to force snapshot after cluster version downgrade.
296
- forceSnapshot bool
297
+ // TODO: Replace with flush db in v3.7 assuming v3.6 bootstraps from db file.
298
+ forceDiskSnapshot bool
297
299
corruptionChecker CorruptionChecker
298
300
}
299
301
@@ -741,10 +743,11 @@ func (s *EtcdServer) ReportSnapshot(id uint64, status raft.SnapshotStatus) {
741
743
}
742
744
743
745
type etcdProgress struct {
744
- confState raftpb.ConfState
745
- snapi uint64
746
- appliedt uint64
747
- appliedi uint64
746
+ confState raftpb.ConfState
747
+ diskSnapshotIndex uint64
748
+ memorySnapshotIndex uint64
749
+ appliedt uint64
750
+ appliedi uint64
748
751
}
749
752
750
753
// raftReadyHandler contains a set of EtcdServer operations to be called by raftNode,
@@ -809,10 +812,11 @@ func (s *EtcdServer) run() {
809
812
s .r .start (rh )
810
813
811
814
ep := etcdProgress {
812
- confState : sn .Metadata .ConfState ,
813
- snapi : sn .Metadata .Index ,
814
- appliedt : sn .Metadata .Term ,
815
- appliedi : sn .Metadata .Index ,
815
+ confState : sn .Metadata .ConfState ,
816
+ diskSnapshotIndex : sn .Metadata .Index ,
817
+ memorySnapshotIndex : sn .Metadata .Index ,
818
+ appliedt : sn .Metadata .Term ,
819
+ appliedi : sn .Metadata .Index ,
816
820
}
817
821
818
822
defer func () {
@@ -998,15 +1002,15 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
998
1002
lg := s .Logger ()
999
1003
lg .Info (
1000
1004
"applying snapshot" ,
1001
- zap .Uint64 ("current-snapshot-index" , ep .snapi ),
1005
+ zap .Uint64 ("current-snapshot-index" , ep .diskSnapshotIndex ),
1002
1006
zap .Uint64 ("current-applied-index" , ep .appliedi ),
1003
1007
zap .Uint64 ("incoming-leader-snapshot-index" , toApply .snapshot .Metadata .Index ),
1004
1008
zap .Uint64 ("incoming-leader-snapshot-term" , toApply .snapshot .Metadata .Term ),
1005
1009
)
1006
1010
defer func () {
1007
1011
lg .Info (
1008
1012
"applied snapshot" ,
1009
- zap .Uint64 ("current-snapshot-index" , ep .snapi ),
1013
+ zap .Uint64 ("current-snapshot-index" , ep .diskSnapshotIndex ),
1010
1014
zap .Uint64 ("current-applied-index" , ep .appliedi ),
1011
1015
zap .Uint64 ("incoming-leader-snapshot-index" , toApply .snapshot .Metadata .Index ),
1012
1016
zap .Uint64 ("incoming-leader-snapshot-term" , toApply .snapshot .Metadata .Term ),
@@ -1017,7 +1021,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
1017
1021
if toApply .snapshot .Metadata .Index <= ep .appliedi {
1018
1022
lg .Panic (
1019
1023
"unexpected leader snapshot from outdated index" ,
1020
- zap .Uint64 ("current-snapshot-index" , ep .snapi ),
1024
+ zap .Uint64 ("current-snapshot-index" , ep .diskSnapshotIndex ),
1021
1025
zap .Uint64 ("current-applied-index" , ep .appliedi ),
1022
1026
zap .Uint64 ("incoming-leader-snapshot-index" , toApply .snapshot .Metadata .Index ),
1023
1027
zap .Uint64 ("incoming-leader-snapshot-term" , toApply .snapshot .Metadata .Term ),
@@ -1132,7 +1136,8 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
1132
1136
1133
1137
ep .appliedt = toApply .snapshot .Metadata .Term
1134
1138
ep .appliedi = toApply .snapshot .Metadata .Index
1135
- ep .snapi = ep .appliedi
1139
+ ep .diskSnapshotIndex = ep .appliedi
1140
+ ep .memorySnapshotIndex = ep .appliedi
1136
1141
ep .confState = toApply .snapshot .Metadata .ConfState
1137
1142
1138
1143
// As backends and implementations like alarmsStore changed, we need
@@ -1188,31 +1193,37 @@ func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *toApply) {
1188
1193
}
1189
1194
1190
1195
func (s * EtcdServer ) ForceSnapshot () {
1191
- s .forceSnapshot = true
1196
+ s .forceDiskSnapshot = true
1192
1197
}
1193
1198
1194
1199
func (s * EtcdServer ) triggerSnapshot (ep * etcdProgress ) {
1195
- if ! s .shouldSnapshot (ep ) {
1200
+ if ! s .shouldSnapshotToDisk (ep ) {
1201
+ if ep .appliedi > ep .memorySnapshotIndex + memorySnapshotCount {
1202
+ s .snapshotToMemory (ep .appliedi , ep .confState )
1203
+ s .compactRaftLog (ep .appliedi )
1204
+ ep .memorySnapshotIndex = ep .appliedi
1205
+ }
1196
1206
return
1197
1207
}
1208
+ //TODO: Remove disk snapshot in v3.7
1198
1209
lg := s .Logger ()
1199
1210
lg .Info (
1200
1211
"triggering snapshot" ,
1201
1212
zap .String ("local-member-id" , s .MemberID ().String ()),
1202
1213
zap .Uint64 ("local-member-applied-index" , ep .appliedi ),
1203
- zap .Uint64 ("local-member-snapshot-index" , ep .snapi ),
1214
+ zap .Uint64 ("local-member-snapshot-index" , ep .diskSnapshotIndex ),
1204
1215
zap .Uint64 ("local-member-snapshot-count" , s .Cfg .SnapshotCount ),
1205
- zap .Bool ("snapshot-forced" , s .forceSnapshot ),
1216
+ zap .Bool ("snapshot-forced" , s .forceDiskSnapshot ),
1206
1217
)
1207
- s .forceSnapshot = false
1218
+ s .forceDiskSnapshot = false
1208
1219
1209
- s .snapshot (ep .appliedi , ep .confState )
1220
+ s .snapshotToDisk (ep .appliedi , ep .confState )
1210
1221
s .compactRaftLog (ep .appliedi )
1211
- ep .snapi = ep .appliedi
1222
+ ep .diskSnapshotIndex = ep .appliedi
1212
1223
}
1213
1224
1214
- func (s * EtcdServer ) shouldSnapshot (ep * etcdProgress ) bool {
1215
- return (s .forceSnapshot && ep .appliedi != ep .snapi ) || (ep .appliedi - ep .snapi > s .Cfg .SnapshotCount )
1225
+ func (s * EtcdServer ) shouldSnapshotToDisk (ep * etcdProgress ) bool {
1226
+ return (s .forceDiskSnapshot && ep .appliedi != ep .diskSnapshotIndex ) || (ep .appliedi - ep .diskSnapshotIndex > s .Cfg .SnapshotCount )
1216
1227
}
1217
1228
1218
1229
func (s * EtcdServer ) hasMultipleVotingMembers () bool {
@@ -2132,7 +2143,7 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
2132
2143
}
2133
2144
2134
2145
// TODO: non-blocking snapshot
2135
- func (s * EtcdServer ) snapshot (snapi uint64 , confState raftpb.ConfState ) {
2146
+ func (s * EtcdServer ) snapshotToDisk (snapi uint64 , confState raftpb.ConfState ) {
2136
2147
d := GetMembershipInfoInV2Format (s .Logger (), s .cluster )
2137
2148
// commit kv to write metadata (for example: consistent index) to disk.
2138
2149
//
@@ -2174,6 +2185,25 @@ func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
2174
2185
)
2175
2186
}
2176
2187
2188
+ func (s * EtcdServer ) snapshotToMemory (snapi uint64 , confState raftpb.ConfState ) {
2189
+ d := GetMembershipInfoInV2Format (s .Logger (), s .cluster )
2190
+
2191
+ lg := s .Logger ()
2192
+
2193
+ // For backward compatibility, generate v2 snapshot from v3 state.
2194
+ snap , err := s .r .raftStorage .CreateSnapshot (snapi , & confState , d )
2195
+ if err != nil {
2196
+ // the snapshot was done asynchronously with the progress of raft.
2197
+ // raft might have already got a newer snapshot.
2198
+ if errorspkg .Is (err , raft .ErrSnapOutOfDate ) {
2199
+ return
2200
+ }
2201
+ lg .Panic ("failed to create snapshot" , zap .Error (err ))
2202
+ }
2203
+
2204
+ verifyConsistentIndexIsLatest (lg , snap , s .consistIndex .ConsistentIndex ())
2205
+ }
2206
+
2177
2207
func (s * EtcdServer ) compactRaftLog (snapi uint64 ) {
2178
2208
lg := s .Logger ()
2179
2209
@@ -2189,10 +2219,10 @@ func (s *EtcdServer) compactRaftLog(snapi uint64) {
2189
2219
2190
2220
// keep some in memory log entries for slow followers.
2191
2221
compacti := uint64 (1 )
2192
- if snapi > s .Cfg .SnapshotCatchUpEntries {
2193
- compacti = snapi - s . Cfg . SnapshotCatchUpEntries
2222
+ if snapi <= s .Cfg .SnapshotCatchUpEntries {
2223
+ return
2194
2224
}
2195
-
2225
+ compacti = snapi - s . Cfg . SnapshotCatchUpEntries
2196
2226
err := s .r .raftStorage .Compact (compacti )
2197
2227
if err != nil {
2198
2228
// the compaction was done asynchronously with the progress of raft.
0 commit comments