Skip to content

Commit be36aa7

Browse files
committed
perf: optimize batch execution path for reduced allocations
1 parent d15f511 commit be36aa7

File tree

3 files changed

+374
-43
lines changed

3 files changed

+374
-43
lines changed

batch_bench_test.go

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package gocql
20+
21+
import (
22+
"fmt"
23+
"strconv"
24+
"testing"
25+
)
26+
27+
// benchSink prevents the compiler from eliminating allocations via dead-code
28+
// elimination. Assigned at the end of each benchmark loop.
29+
var benchSink interface{}
30+
31+
// BenchmarkBatchQueryAppend measures the cost of appending entries to a Batch
32+
// via the Query() method. This exercises slice growth and BatchEntry allocation.
33+
func BenchmarkBatchQueryAppend(b *testing.B) {
34+
for _, size := range []int{10, 100} {
35+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
36+
b.ReportAllocs()
37+
// Pre-compute value strings so fmt.Sprintf doesn't dominate allocations.
38+
vals := make([]string, size)
39+
for j := 0; j < size; j++ {
40+
vals[j] = "val_" + strconv.Itoa(j)
41+
}
42+
var batch *Batch
43+
for i := 0; i < b.N; i++ {
44+
batch = &Batch{
45+
Type: LoggedBatch,
46+
}
47+
for j := 0; j < size; j++ {
48+
batch.Query("INSERT INTO ks.tbl (pk, v) VALUES (?, ?)", j, vals[j])
49+
}
50+
}
51+
benchSink = batch
52+
})
53+
}
54+
}
55+
56+
// BenchmarkBatchQueryAppendPreallocated measures the cost of appending entries
57+
// to a Batch with a pre-allocated Entries slice, to serve as comparison target
58+
// for the Reserve() optimization.
59+
func BenchmarkBatchQueryAppendPreallocated(b *testing.B) {
60+
for _, size := range []int{10, 100} {
61+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
62+
b.ReportAllocs()
63+
// Pre-compute value strings so fmt.Sprintf doesn't dominate allocations.
64+
vals := make([]string, size)
65+
for j := 0; j < size; j++ {
66+
vals[j] = "val_" + strconv.Itoa(j)
67+
}
68+
var batch *Batch
69+
for i := 0; i < b.N; i++ {
70+
batch = (&Batch{
71+
Type: LoggedBatch,
72+
}).Reserve(size)
73+
for j := 0; j < size; j++ {
74+
batch.Query("INSERT INTO ks.tbl (pk, v) VALUES (?, ?)", j, vals[j])
75+
}
76+
}
77+
benchSink = batch
78+
})
79+
}
80+
}
81+
82+
// BenchmarkBatchBuildWriteFrame measures the cost of building a writeBatchFrame
83+
// from pre-populated batch statements with prepared IDs and queryValues.
84+
// This isolates the allocation patterns in executeBatch's frame-building logic.
85+
func BenchmarkBatchBuildWriteFrame(b *testing.B) {
86+
for _, size := range []int{10, 100} {
87+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
88+
b.ReportAllocs()
89+
90+
colCount := 2
91+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
92+
93+
// Pre-compute prepared IDs and marshaled values outside the benchmark loop
94+
// so fmt.Sprintf and Marshal don't pollute allocation measurements.
95+
prepIDs := make([][]byte, size)
96+
marshaledVals := make([][]byte, size*colCount)
97+
for j := 0; j < size; j++ {
98+
prepIDs[j] = []byte("prepared_" + strconv.Itoa(j%5))
99+
for k := 0; k < colCount; k++ {
100+
val, err := Marshal(typ, j+k)
101+
if err != nil {
102+
b.Fatalf("Marshal(%d): %v", j+k, err)
103+
}
104+
marshaledVals[j*colCount+k] = val
105+
}
106+
}
107+
108+
b.ResetTimer()
109+
110+
var req *writeBatchFrame
111+
for i := 0; i < b.N; i++ {
112+
req = &writeBatchFrame{
113+
typ: LoggedBatch,
114+
statements: make([]batchStatment, size),
115+
consistency: Quorum,
116+
defaultTimestamp: true,
117+
}
118+
119+
stmts := make(map[string]string, size)
120+
121+
// Simulate the per-statement allocation pattern from executeBatch
122+
for j := 0; j < size; j++ {
123+
bs := &req.statements[j]
124+
bs.preparedID = prepIDs[j]
125+
stmts[string(bs.preparedID)] = "INSERT INTO ks.tbl (pk, v) VALUES (?, ?)"
126+
127+
bs.values = make([]queryValues, colCount)
128+
for k := 0; k < colCount; k++ {
129+
bs.values[k] = queryValues{value: marshaledVals[j*colCount+k]}
130+
}
131+
}
132+
// Prevent the compiler from eliminating the stmts allocation.
133+
benchSink = stmts
134+
}
135+
benchSink = req
136+
})
137+
}
138+
}
139+
140+
// BenchmarkBatchBuildWriteFrameBulkAlloc measures the cost of building a
141+
// writeBatchFrame using a single bulk allocation for all queryValues.
142+
// This reflects the optimized allocation pattern that replaces per-statement
143+
// make([]queryValues, ...) calls with a single contiguous slice.
144+
func BenchmarkBatchBuildWriteFrameBulkAlloc(b *testing.B) {
145+
for _, size := range []int{10, 100} {
146+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
147+
b.ReportAllocs()
148+
149+
colCount := 2
150+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
151+
152+
// Pre-compute prepared IDs and marshaled values outside the benchmark loop.
153+
prepIDs := make([][]byte, size)
154+
marshaledVals := make([][]byte, size*colCount)
155+
for j := 0; j < size; j++ {
156+
prepIDs[j] = []byte("prepared_" + strconv.Itoa(j%5))
157+
for k := 0; k < colCount; k++ {
158+
val, err := Marshal(typ, j+k)
159+
if err != nil {
160+
b.Fatalf("Marshal(%d): %v", j+k, err)
161+
}
162+
marshaledVals[j*colCount+k] = val
163+
}
164+
}
165+
166+
b.ResetTimer()
167+
168+
var req *writeBatchFrame
169+
for i := 0; i < b.N; i++ {
170+
req = &writeBatchFrame{
171+
typ: LoggedBatch,
172+
statements: make([]batchStatment, size),
173+
consistency: Quorum,
174+
defaultTimestamp: true,
175+
}
176+
177+
// Bulk-allocate all queryValues in a single slice
178+
allValues := make([]queryValues, size*colCount)
179+
180+
for j := 0; j < size; j++ {
181+
bs := &req.statements[j]
182+
bs.preparedID = prepIDs[j]
183+
184+
bs.values = allValues[j*colCount : (j+1)*colCount]
185+
for k := 0; k < colCount; k++ {
186+
bs.values[k] = queryValues{value: marshaledVals[j*colCount+k]}
187+
}
188+
}
189+
}
190+
benchSink = req
191+
})
192+
}
193+
}
194+
195+
// BenchmarkBatchWriteFrameSerialization measures the cost of serializing a
196+
// writeBatchFrame to bytes via the framer.
197+
func BenchmarkBatchWriteFrameSerialization(b *testing.B) {
198+
for _, size := range []int{10, 100} {
199+
b.Run(fmt.Sprintf("entries=%d", size), func(b *testing.B) {
200+
b.ReportAllocs()
201+
202+
colCount := 2
203+
typ := NativeType{proto: protoVersion4, typ: TypeInt}
204+
205+
// Pre-build the frame once
206+
frame := &writeBatchFrame{
207+
typ: LoggedBatch,
208+
statements: make([]batchStatment, size),
209+
consistency: Quorum,
210+
defaultTimestamp: true,
211+
}
212+
213+
for j := 0; j < size; j++ {
214+
bs := &frame.statements[j]
215+
bs.preparedID = []byte("prepared_" + strconv.Itoa(j%5))
216+
bs.values = make([]queryValues, colCount)
217+
for k := 0; k < colCount; k++ {
218+
val, err := Marshal(typ, j+k)
219+
if err != nil {
220+
b.Fatalf("Marshal(%d): %v", j+k, err)
221+
}
222+
bs.values[k] = queryValues{value: val}
223+
}
224+
}
225+
226+
b.ResetTimer()
227+
228+
for i := 0; i < b.N; i++ {
229+
f := newFramer(nil, protoVersion4)
230+
err := frame.buildFrame(f, 1)
231+
if err != nil {
232+
b.Fatal(err)
233+
}
234+
}
235+
})
236+
}
237+
}

0 commit comments

Comments
 (0)