Skip to content

Commit c5127d8

Browse files
craig[bot]pav-kv
craig[bot]
andcommitted
Merge #145439
145439: kvserver: mv entries cache update out of logstore r=tbg a=pav-kv The current rough split between `LogStore` and `replicaLogStorage` is that the latter is responsible for the in-memory state and keeping it consistent with storage, and the former is just the storage part. This commit moves the code in that direction by placing the raft entries cache update in `replicaLogStorage`. Related to #136109 Epic: CRDB-46488 Co-authored-by: Pavel Kalinnikov <[email protected]>
2 parents 0b3d992 + e49742c commit c5127d8

7 files changed

+30
-26
lines changed

pkg/kv/kvserver/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,6 @@ go_test(
456456
"//pkg/kv/kvserver/protectedts/ptpb",
457457
"//pkg/kv/kvserver/protectedts/ptstorage",
458458
"//pkg/kv/kvserver/protectedts/ptutil",
459-
"//pkg/kv/kvserver/raftentry",
460459
"//pkg/kv/kvserver/raftlog",
461460
"//pkg/kv/kvserver/rafttrace",
462461
"//pkg/kv/kvserver/raftutil",

pkg/kv/kvserver/client_manual_proposal_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
2020
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
2121
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
22-
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry"
2322
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog"
2423
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
2524
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/uncertainty"
@@ -223,7 +222,6 @@ LIMIT
223222
Sideload: nil,
224223
StateLoader: rsl,
225224
SyncWaiter: swl,
226-
EntryCache: raftentry.NewCache(1024),
227225
Settings: st,
228226
}
229227

pkg/kv/kvserver/logstore/logstore.go

+1-15
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ type LogStore struct {
132132
Sideload SideloadStorage
133133
StateLoader StateLoader // used only for writes under raftMu
134134
SyncWaiter *SyncWaiterLoop
135-
EntryCache *raftentry.Cache
136135
Settings *cluster.Settings
137136

138137
DisableSyncLogWriteToss bool // for testing only
@@ -324,19 +323,6 @@ func (s *LogStore) storeEntriesAndCommitBatch(
324323
}
325324
}
326325

327-
// Update raft log entry cache. We clear any older, uncommitted log entries
328-
// and cache the latest ones.
329-
//
330-
// In the blocking log sync case, these entries are already durable. In the
331-
// non-blocking case, these entries have been written to the pebble engine (so
332-
// reads of the engine will see them), but they are not yet be durable. This
333-
// means that the entry cache can lead the durable log. This is allowed by
334-
// etcd/raft, which maintains its own tracking of entry durability by
335-
// splitting its log into an unstable portion for entries that are not known
336-
// to be durable and a stable portion for entries that are known to be
337-
// durable.
338-
s.EntryCache.Add(s.RangeID, m.Entries, true /* truncate */)
339-
340326
return state, nil
341327
}
342328

@@ -607,7 +593,7 @@ func LoadEntries(
607593
ctx context.Context,
608594
eng storage.Engine,
609595
rangeID roachpb.RangeID,
610-
eCache *raftentry.Cache,
596+
eCache *raftentry.Cache, // TODO(#145562): this should be the caller's concern
611597
sideloaded SideloadStorage,
612598
lo, hi kvpb.RaftIndex,
613599
maxBytes uint64,

pkg/kv/kvserver/logstore/logstore_bench_test.go

-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"math/rand"
1212
"testing"
1313

14-
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry"
1514
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog"
1615
"github.com/cockroachdb/cockroach/pkg/raft"
1716
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
@@ -48,8 +47,6 @@ func BenchmarkLogStore_StoreEntries(b *testing.B) {
4847

4948
func runBenchmarkLogStore_StoreEntries(b *testing.B, bytes int64) {
5049
ctx := context.Background()
51-
const tenMB = 10 * 1 << 20
52-
ec := raftentry.NewCache(tenMB)
5350
const rangeID = 1
5451
eng := storage.NewDefaultInMemForTesting()
5552
defer eng.Close()
@@ -59,7 +56,6 @@ func runBenchmarkLogStore_StoreEntries(b *testing.B, bytes int64) {
5956
RangeID: rangeID,
6057
Engine: eng,
6158
StateLoader: NewStateLoader(rangeID),
62-
EntryCache: ec,
6359
Settings: st,
6460
}
6561

pkg/kv/kvserver/replica_init.go

-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ func newUninitializedReplicaWithoutRaftGroup(
222222
// NOTE: use the same SyncWaiter loop for all raft log writes performed by a
223223
// given range ID, to ensure that callbacks are processed in order.
224224
SyncWaiter: store.syncWaiters[int(rangeID)%len(store.syncWaiters)],
225-
EntryCache: store.raftEntryCache,
226225
Settings: store.cfg.Settings,
227226
DisableSyncLogWriteToss: buildutil.CrdbTestBuild &&
228227
store.TestingKnobs().DisableSyncLogWriteToss,

pkg/kv/kvserver/replica_raftlog.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,13 @@ func (r *Replica) raftTermShMuLocked(index kvpb.RaftIndex) (kvpb.RaftTerm, error
143143
}
144144
// Cache the entry except if it is sideloaded. We don't load/inline the
145145
// sideloaded entries here to keep the term fetching cheap.
146-
// TODO(pav-kv): consider not caching here, after measuring if it makes any
147-
// difference.
146+
//
147+
// TODO(pav-kv): consider not caching here, after measuring or guessing
148+
// whether it makes any difference. It might be harmful to the cache since
149+
// terms tend to be loaded randomly, and the cache assumes moving forward. We
150+
// also now have the term cache in raft which makes optimizations here
151+
// unnecessary. Plus, there is a longer-term better solution not involving
152+
// entry loads: the term cache can be maintained in storage. See #136296.
148153
if typ, _, err := raftlog.EncodingOf(entry); err != nil {
149154
return 0, err
150155
} else if !typ.IsSideloaded() {

pkg/kv/kvserver/replica_raftlog_writes.go

+22-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,28 @@ func (r *replicaLogStorage) appendRaftMuLocked(
5858
) (logstore.RaftState, error) {
5959
state := r.stateRaftMuLocked()
6060
cb := (*replicaSyncCallback)(r)
61-
return r.raftMu.logStorage.StoreEntries(ctx, state, app, cb, stats)
61+
state, err := r.raftMu.logStorage.StoreEntries(ctx, state, app, cb, stats)
62+
if err != nil {
63+
return state, err
64+
}
65+
// Update raft log entry cache. We clear any older, uncommitted log entries
66+
// and cache the latest ones.
67+
//
68+
// In the blocking log sync case, these entries are already durable. In the
69+
// non-blocking case, these entries have been written to storage (so reads of
70+
// the engine will see them), but they are not yet durable. This means that
71+
// the entry cache can lead the durable log. This is allowed by raft, which
72+
// maintains its own tracking of entry durability by splitting its log into an
73+
// unstable portion for entries that are not known to be durable and a stable
74+
// portion for entries that are known to be durable.
75+
//
76+
// TODO(pav-kv): for safety, decompose this update into two steps: before
77+
// writing to storage, truncate the suffix of the cache if entries are
78+
// overwritten; after the write, append new entries to the cache. Currently,
79+
// the cache can contain stale entries while storage is already updated, and
80+
// the only hope is that nobody tries to read it under Replica.mu only.
81+
r.store.raftEntryCache.Add(r.RangeID, app.Entries, true /* truncate */)
82+
return state, nil
6283
}
6384

6485
// updateStateRaftMuLockedMuLocked updates the in-memory reflection of the raft

0 commit comments

Comments
 (0)