Skip to content

Commit 0a6058b

Browse files
committed
perf: optimize batch execution path for reduced allocations
Motivation ---------- The BATCH execution path (executeBatch / writeBatchFrame) in the gocql driver performed several avoidable allocations per batch execution: individual make() calls for each statement's queryValues slice, a separate stmts map[string]string to track prepared-ID-to-statement mappings, sequential preparation of all statements, and unbounded recursive retries on RequestErrUnprepared errors. Changes ------- 1. Concurrent statement preparation (Item 1): Split executeBatch into a retry loop (executeBatch) and single-attempt logic (executeBatchOnce). Unique statements are collected into a map and prepared concurrently via goroutines + sync.WaitGroup. The existing prepareStatement coalesces concurrent calls for the same statement via execIfMissing on the LRU cache, so this is safe. 2. Bulk-allocate queryValues (Item 2): Two-pass approach: first pass computes totalValues across all entries, second pass slices from a single []queryValues allocation. Replaces N individual make([]queryValues, colCount) calls with one. 3. Eliminate stmts map (Item 3): Added sourceStmt field to batchStatment struct (frame.go). On RequestErrUnprepared, a linear scan of req.statements replaces the map lookup. This is acceptable because it only runs on the error recovery path, not the hot path. Added unexported stmt field to RequestErrUnprepared (errors.go) to carry the source statement text from executeBatchOnce back to the retry loop. 4. Reserve(n int) API (Item 4): Added Batch.Reserve(n int) *Batch to session.go, which pre-allocates the Entries slice capacity. Named Reserve (not Size) because Size() already exists and returns len(b.Entries). 5. Bounded retry on RequestErrUnprepared (Item 9): Converted the unbounded recursive executeBatch call to an iterative loop with maxBatchPrepareRetries = 1. If re-preparation fails twice, the error is returned rather than retrying indefinitely. Benchmark results (synthetic, 8 samples, not end-to-end latency) ---------------------------------------------------------------- These benchmarks measure allocation patterns only. They do NOT measure real Cassandra/Scylla round-trip latency or throughput. The numbers below reflect the cost of building batch data structures in isolation. Reserve() — Batch.Query append: entries=10: 2407 B/op 35 allocs -> 1110 B/op 31 allocs (-54% B, -11% allocs) entries=100: 21791 B/op 308 allocs -> 11720 B/op 301 allocs (-46% B, -2% allocs) Bulk queryValues allocation — writeBatchFrame construction: entries=10: 3434 B/op 74 allocs -> 2294 B/op 42 allocs (-33% B, -43% allocs) entries=100: 32319 B/op 704 allocs -> 21668 B/op 402 allocs (-33% B, -43% allocs) Files changed: conn.go, frame.go, errors.go, session.go Files added: batch_bench_test.go
1 parent 0952897 commit 0a6058b

File tree

5 files changed

+332
-41
lines changed

5 files changed

+332
-41
lines changed

batch_bench_test.go

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package gocql
20+
21+
import (
22+
"fmt"
23+
"testing"
24+
)
25+
26+
// BenchmarkBatchQueryAppend measures the cost of appending entries to a Batch
27+
// via the Query() method. This exercises slice growth and BatchEntry allocation.
28+
func BenchmarkBatchQueryAppend(b *testing.B) {
29+
for _, size := range []int{10, 100} {
30+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
31+
b.ReportAllocs()
32+
for i := 0; i < b.N; i++ {
33+
batch := &Batch{
34+
Type: LoggedBatch,
35+
}
36+
for j := 0; j < size; j++ {
37+
batch.Query("INSERT INTO ks.tbl (pk, v) VALUES (?, ?)", j, fmt.Sprintf("val_%d", j))
38+
}
39+
}
40+
})
41+
}
42+
}
43+
44+
// BenchmarkBatchQueryAppendPreallocated measures the cost of appending entries
45+
// to a Batch with a pre-allocated Entries slice, to serve as comparison target
46+
// for the Reserve() optimization.
47+
func BenchmarkBatchQueryAppendPreallocated(b *testing.B) {
48+
for _, size := range []int{10, 100} {
49+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
50+
b.ReportAllocs()
51+
for i := 0; i < b.N; i++ {
52+
batch := (&Batch{
53+
Type: LoggedBatch,
54+
}).Reserve(size)
55+
for j := 0; j < size; j++ {
56+
batch.Query("INSERT INTO ks.tbl (pk, v) VALUES (?, ?)", j, fmt.Sprintf("val_%d", j))
57+
}
58+
}
59+
})
60+
}
61+
}
62+
63+
// BenchmarkBatchBuildWriteFrame measures the cost of building a writeBatchFrame
64+
// from pre-populated batch statements with prepared IDs and queryValues.
65+
// This isolates the allocation patterns in executeBatch's frame-building logic.
66+
func BenchmarkBatchBuildWriteFrame(b *testing.B) {
67+
for _, size := range []int{10, 100} {
68+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
69+
b.ReportAllocs()
70+
71+
// Pre-build mock column types and values
72+
colCount := 2
73+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
74+
75+
for i := 0; i < b.N; i++ {
76+
req := &writeBatchFrame{
77+
typ: LoggedBatch,
78+
statements: make([]batchStatment, size),
79+
consistency: Quorum,
80+
defaultTimestamp: true,
81+
}
82+
83+
stmts := make(map[string]string, size)
84+
85+
// Simulate the allocation pattern from executeBatch
86+
for j := 0; j < size; j++ {
87+
bs := &req.statements[j]
88+
bs.preparedID = []byte(fmt.Sprintf("prepared_%d", j%5))
89+
stmts[string(bs.preparedID)] = fmt.Sprintf("INSERT INTO ks.tbl (pk, v) VALUES (?, ?)")
90+
91+
bs.values = make([]queryValues, colCount)
92+
for k := 0; k < colCount; k++ {
93+
val, _ := Marshal(typ, j+k)
94+
bs.values[k] = queryValues{value: val}
95+
}
96+
}
97+
}
98+
})
99+
}
100+
}
101+
102+
// BenchmarkBatchBuildWriteFrameBulkAlloc measures the cost of building a
103+
// writeBatchFrame using a single bulk allocation for all queryValues and
104+
// sourceStmt on the struct (no separate map). This reflects the optimized
105+
// allocation pattern from Items 2 and 3.
106+
func BenchmarkBatchBuildWriteFrameBulkAlloc(b *testing.B) {
107+
for _, size := range []int{10, 100} {
108+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
109+
b.ReportAllocs()
110+
111+
colCount := 2
112+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
113+
114+
for i := 0; i < b.N; i++ {
115+
req := &writeBatchFrame{
116+
typ: LoggedBatch,
117+
statements: make([]batchStatment, size),
118+
consistency: Quorum,
119+
defaultTimestamp: true,
120+
}
121+
122+
// Bulk-allocate all queryValues in a single slice
123+
allValues := make([]queryValues, size*colCount)
124+
125+
for j := 0; j < size; j++ {
126+
bs := &req.statements[j]
127+
bs.preparedID = []byte(fmt.Sprintf("prepared_%d", j%5))
128+
bs.sourceStmt = "INSERT INTO ks.tbl (pk, v) VALUES (?, ?)"
129+
130+
bs.values = allValues[j*colCount : (j+1)*colCount]
131+
for k := 0; k < colCount; k++ {
132+
val, _ := Marshal(typ, j+k)
133+
bs.values[k] = queryValues{value: val}
134+
}
135+
}
136+
}
137+
})
138+
}
139+
}
140+
141+
// BenchmarkBatchWriteFrameSerialization measures the cost of serializing a
142+
// writeBatchFrame to bytes via the framer.
143+
func BenchmarkBatchWriteFrameSerialization(b *testing.B) {
144+
for _, size := range []int{10, 100} {
145+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
146+
b.ReportAllocs()
147+
148+
colCount := 2
149+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
150+
151+
// Pre-build the frame once
152+
frame := &writeBatchFrame{
153+
typ: LoggedBatch,
154+
statements: make([]batchStatment, size),
155+
consistency: Quorum,
156+
defaultTimestamp: true,
157+
}
158+
159+
for j := 0; j < size; j++ {
160+
bs := &frame.statements[j]
161+
bs.preparedID = []byte(fmt.Sprintf("prepared_%d", j%5))
162+
bs.values = make([]queryValues, colCount)
163+
for k := 0; k < colCount; k++ {
164+
val, _ := Marshal(typ, j+k)
165+
bs.values[k] = queryValues{value: val}
166+
}
167+
}
168+
169+
b.ResetTimer()
170+
171+
for i := 0; i < b.N; i++ {
172+
f := newFramer(nil, protoVersion4)
173+
err := frame.buildFrame(f, 1)
174+
if err != nil {
175+
b.Fatal(err)
176+
}
177+
}
178+
})
179+
}
180+
}

0 commit comments

Comments
 (0)