Skip to content

Commit 79698fb

Browse files
committed
perf: optimize vector marshal/unmarshal for float32/float64/int32/int64
Add type-specialized fast paths for vector<float>, vector<double>, vector<int>, and vector<bigint> that bypass reflect-based per-element marshaling in favor of direct encoding/binary bulk conversion. Changes in marshal.go: - Type switches in marshalVector()/unmarshalVector() dispatch to dedicated functions for []float32, []float64, []int32, []int64 before falling through to the generic reflect path. - 8 new functions: marshalVectorFloat32, marshalVectorFloat64, unmarshalVectorFloat32, unmarshalVectorFloat64, marshalVectorInt32, marshalVectorInt64, unmarshalVectorInt32, unmarshalVectorInt64. - sync.Pool buffer reuse (vectorBufPool/getVectorBuf/putVectorBuf) for zero-alloc steady state when callers return buffers after the framer copies them. 64KiB cap prevents pool bloat. - Unmarshal fast paths reuse destination slice backing array when capacity is sufficient (zero-alloc steady state on read path). - Generic path preallocation via vectorFixedElemSize() + buf.Grow() for non-fast-path fixed-size types (e.g. UUID, timestamp). Benchmark results for vector<float, 1536> (typical embedding dimension): Marshal (baseline -> optimized): 86.4 us/op -> 3.4 us/op (25x faster) 3081 allocs -> 2 allocs (99.94% fewer) 28632 B/op -> 6172 B/op (78% less memory) Marshal with pool return (steady state): 86.4 us/op -> 1.6 us/op (54x faster) 3081 allocs -> 2 allocs (99.94% fewer) 28632 B/op -> 48 B/op (99.8% less memory) Unmarshal (baseline -> optimized): 60.2 us/op -> 1.5 us/op (41x faster) 2 allocs -> 0 allocs (100% fewer) 6168 B/op -> 0 B/op (100% less memory) Round-trip (baseline -> optimized, pooled): 147.8 us/op -> 3.1 us/op (48x faster) 3083 allocs -> 2 allocs (99.94% fewer) 34800 B/op -> 48 B/op (99.9% less memory) Throughput: 80 MB/s -> 3.5 GB/s (geomean, +2900%) New test files: - marshal_vector_test.go: 58 unit subtests across 13 categories (round-trip, byte-compat, slice-reuse, nil, dimension-mismatch, empty-vector, pointer-to-slice, special-values, pool-concurrency, oversized-not-pooled, fixed-elem-size, generic-prealloc). - vector_bench_test.go: extended with int32/int64 and pooled benchmarks. - tests/bench/bench_vector_public_test.go: public API benchmarks for int32/int64 marshal/unmarshal. Subsumes PR #744 (float fast paths) and PR #745 (generic prealloc). Extends with int32/int64 fast paths and buffer pooling not covered by any existing PR.
1 parent 0952897 commit 79698fb

4 files changed

Lines changed: 1973 additions & 0 deletions

File tree

marshal.go

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ package gocql
2626

2727
import (
2828
"bytes"
29+
"encoding/binary"
2930
"errors"
3031
"fmt"
3132
"math"
3233
"math/bits"
3334
"reflect"
35+
"sync"
3436
"unsafe"
3537

3638
"github.com/gocql/gocql/serialization/ascii"
@@ -839,6 +841,26 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
839841
return nil, nil
840842
}
841843

844+
// Fast paths for common vector types — avoid reflect entirely.
845+
switch info.SubType.Type() {
846+
case TypeFloat:
847+
if vec, ok := value.([]float32); ok {
848+
return marshalVectorFloat32(info.Dimensions, vec)
849+
}
850+
case TypeDouble:
851+
if vec, ok := value.([]float64); ok {
852+
return marshalVectorFloat64(info.Dimensions, vec)
853+
}
854+
case TypeInt:
855+
if vec, ok := value.([]int32); ok {
856+
return marshalVectorInt32(info.Dimensions, vec)
857+
}
858+
case TypeBigInt:
859+
if vec, ok := value.([]int64); ok {
860+
return marshalVectorInt64(info.Dimensions, vec)
861+
}
862+
}
863+
842864
rv := reflect.ValueOf(value)
843865
t := rv.Type()
844866
k := t.Kind()
@@ -855,6 +877,13 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
855877
}
856878

857879
isLengthType := isVectorVariableLengthType(info.SubType)
880+
if !isLengthType {
881+
if elemSize := vectorFixedElemSize(info.SubType); elemSize > 0 {
882+
if needed := int64(n) * int64(elemSize); needed > 0 && needed <= math.MaxInt32 {
883+
buf.Grow(int(needed))
884+
}
885+
}
886+
}
858887
for i := 0; i < n; i++ {
859888
item, err := Marshal(info.SubType, rv.Index(i).Interface())
860889
if err != nil {
@@ -871,6 +900,26 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
871900
}
872901

873902
func unmarshalVector(info VectorType, data []byte, value interface{}) error {
903+
// Fast paths for common vector types — avoid reflect entirely.
904+
switch info.SubType.Type() {
905+
case TypeFloat:
906+
if dst, ok := value.(*[]float32); ok {
907+
return unmarshalVectorFloat32(info.Dimensions, data, dst)
908+
}
909+
case TypeDouble:
910+
if dst, ok := value.(*[]float64); ok {
911+
return unmarshalVectorFloat64(info.Dimensions, data, dst)
912+
}
913+
case TypeInt:
914+
if dst, ok := value.(*[]int32); ok {
915+
return unmarshalVectorInt32(info.Dimensions, data, dst)
916+
}
917+
case TypeBigInt:
918+
if dst, ok := value.(*[]int64); ok {
919+
return unmarshalVectorInt64(info.Dimensions, data, dst)
920+
}
921+
}
922+
874923
rv := reflect.ValueOf(value)
875924
if rv.Kind() != reflect.Ptr {
876925
return unmarshalErrorf("can not unmarshal into non-pointer %T", value)
@@ -940,6 +989,211 @@ func unmarshalVector(info VectorType, data []byte, value interface{}) error {
940989
return unmarshalErrorf("can not unmarshal %s into %T. Accepted types: *slice, *array, *interface{}.", info, value)
941990
}
942991

992+
// vectorBufPool pools []byte buffers used by vector marshal fast paths.
993+
// Buffers are returned to the pool by putVectorBuf after the framer copies them.
994+
//
995+
// NOTE: putVectorBuf is currently exercised only by benchmarks/tests.
996+
// Wiring it into the connection write path (so production callers return
997+
// buffers after the framer copies them) is planned for a follow-up change.
998+
var vectorBufPool = sync.Pool{}
999+
1000+
func getVectorBuf(size int) []byte {
1001+
if v := vectorBufPool.Get(); v != nil {
1002+
if buf, ok := v.([]byte); ok {
1003+
if cap(buf) >= size {
1004+
return buf[:size]
1005+
}
1006+
// Undersized buffer: return it so smaller vectors can reuse it.
1007+
vectorBufPool.Put(buf) //nolint:staticcheck // SA6002: []byte is a value type; boxing cost is acceptable for pool reuse
1008+
}
1009+
}
1010+
return make([]byte, size)
1011+
}
1012+
1013+
func putVectorBuf(buf []byte) {
1014+
if buf == nil || cap(buf) > 65536 {
1015+
return
1016+
}
1017+
vectorBufPool.Put(buf) //nolint:staticcheck // SA6002: []byte is a value type; boxing cost is acceptable for pool reuse
1018+
}
1019+
1020+
// marshalVectorFloat32 encodes a float32 slice as a contiguous big-endian
1021+
// IEEE 754 vector. Uses a pooled buffer for zero-alloc steady state.
1022+
func marshalVectorFloat32(dim int, vec []float32) ([]byte, error) {
1023+
if vec == nil {
1024+
return nil, nil
1025+
}
1026+
if len(vec) != dim {
1027+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1028+
}
1029+
buf := getVectorBuf(dim * 4)
1030+
for i, v := range vec {
1031+
binary.BigEndian.PutUint32(buf[i*4:], math.Float32bits(v))
1032+
}
1033+
return buf, nil
1034+
}
1035+
1036+
// marshalVectorFloat64 encodes a float64 slice as a contiguous big-endian
1037+
// IEEE 754 vector. Uses a pooled buffer for zero-alloc steady state.
1038+
func marshalVectorFloat64(dim int, vec []float64) ([]byte, error) {
1039+
if vec == nil {
1040+
return nil, nil
1041+
}
1042+
if len(vec) != dim {
1043+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1044+
}
1045+
buf := getVectorBuf(dim * 8)
1046+
for i, v := range vec {
1047+
binary.BigEndian.PutUint64(buf[i*8:], math.Float64bits(v))
1048+
}
1049+
return buf, nil
1050+
}
1051+
1052+
// unmarshalVectorFloat32 decodes contiguous big-endian IEEE 754 floats.
1053+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1054+
func unmarshalVectorFloat32(dim int, data []byte, dst *[]float32) error {
1055+
if data == nil {
1056+
*dst = nil
1057+
return nil
1058+
}
1059+
expected := dim * 4
1060+
if len(data) != expected {
1061+
return unmarshalErrorf("unmarshal vector<float, %d>: expected %d bytes, got %d", dim, expected, len(data))
1062+
}
1063+
vec := *dst
1064+
if cap(vec) >= dim {
1065+
vec = vec[:dim]
1066+
} else {
1067+
vec = make([]float32, dim)
1068+
}
1069+
for i := 0; i < dim; i++ {
1070+
vec[i] = math.Float32frombits(binary.BigEndian.Uint32(data[i*4:]))
1071+
}
1072+
*dst = vec
1073+
return nil
1074+
}
1075+
1076+
// unmarshalVectorFloat64 decodes contiguous big-endian IEEE 754 doubles.
1077+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1078+
func unmarshalVectorFloat64(dim int, data []byte, dst *[]float64) error {
1079+
if data == nil {
1080+
*dst = nil
1081+
return nil
1082+
}
1083+
expected := dim * 8
1084+
if len(data) != expected {
1085+
return unmarshalErrorf("unmarshal vector<double, %d>: expected %d bytes, got %d", dim, expected, len(data))
1086+
}
1087+
vec := *dst
1088+
if cap(vec) >= dim {
1089+
vec = vec[:dim]
1090+
} else {
1091+
vec = make([]float64, dim)
1092+
}
1093+
for i := 0; i < dim; i++ {
1094+
vec[i] = math.Float64frombits(binary.BigEndian.Uint64(data[i*8:]))
1095+
}
1096+
*dst = vec
1097+
return nil
1098+
}
1099+
1100+
// marshalVectorInt32 encodes an int32 slice as a contiguous big-endian
1101+
// vector (CQL int = 4 bytes). Uses a pooled buffer for zero-alloc steady state.
1102+
func marshalVectorInt32(dim int, vec []int32) ([]byte, error) {
1103+
if vec == nil {
1104+
return nil, nil
1105+
}
1106+
if len(vec) != dim {
1107+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1108+
}
1109+
buf := getVectorBuf(dim * 4)
1110+
for i, v := range vec {
1111+
binary.BigEndian.PutUint32(buf[i*4:], uint32(v))
1112+
}
1113+
return buf, nil
1114+
}
1115+
1116+
// marshalVectorInt64 encodes an int64 slice as a contiguous big-endian
1117+
// vector (CQL bigint = 8 bytes). Uses a pooled buffer for zero-alloc steady state.
1118+
func marshalVectorInt64(dim int, vec []int64) ([]byte, error) {
1119+
if vec == nil {
1120+
return nil, nil
1121+
}
1122+
if len(vec) != dim {
1123+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1124+
}
1125+
buf := getVectorBuf(dim * 8)
1126+
for i, v := range vec {
1127+
binary.BigEndian.PutUint64(buf[i*8:], uint64(v))
1128+
}
1129+
return buf, nil
1130+
}
1131+
1132+
// unmarshalVectorInt32 decodes contiguous big-endian CQL int (4-byte) values.
1133+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1134+
func unmarshalVectorInt32(dim int, data []byte, dst *[]int32) error {
1135+
if data == nil {
1136+
*dst = nil
1137+
return nil
1138+
}
1139+
expected := dim * 4
1140+
if len(data) != expected {
1141+
return unmarshalErrorf("unmarshal vector<int, %d>: expected %d bytes, got %d", dim, expected, len(data))
1142+
}
1143+
vec := *dst
1144+
if cap(vec) >= dim {
1145+
vec = vec[:dim]
1146+
} else {
1147+
vec = make([]int32, dim)
1148+
}
1149+
for i := 0; i < dim; i++ {
1150+
vec[i] = int32(binary.BigEndian.Uint32(data[i*4:]))
1151+
}
1152+
*dst = vec
1153+
return nil
1154+
}
1155+
1156+
// unmarshalVectorInt64 decodes contiguous big-endian CQL bigint (8-byte) values.
1157+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1158+
func unmarshalVectorInt64(dim int, data []byte, dst *[]int64) error {
1159+
if data == nil {
1160+
*dst = nil
1161+
return nil
1162+
}
1163+
expected := dim * 8
1164+
if len(data) != expected {
1165+
return unmarshalErrorf("unmarshal vector<bigint, %d>: expected %d bytes, got %d", dim, expected, len(data))
1166+
}
1167+
vec := *dst
1168+
if cap(vec) >= dim {
1169+
vec = vec[:dim]
1170+
} else {
1171+
vec = make([]int64, dim)
1172+
}
1173+
for i := 0; i < dim; i++ {
1174+
vec[i] = int64(binary.BigEndian.Uint64(data[i*8:]))
1175+
}
1176+
*dst = vec
1177+
return nil
1178+
}
1179+
1180+
// vectorFixedElemSize returns the known wire-format byte size for fixed-length
1181+
// CQL types used as vector elements. Returns 0 for variable-length or unknown types.
1182+
func vectorFixedElemSize(elemType TypeInfo) int {
1183+
switch elemType.Type() {
1184+
case TypeBoolean:
1185+
return 1
1186+
case TypeInt, TypeFloat:
1187+
return 4
1188+
case TypeBigInt, TypeDouble, TypeTimestamp:
1189+
return 8
1190+
case TypeUUID, TypeTimeUUID:
1191+
return 16
1192+
default:
1193+
return 0
1194+
}
1195+
}
1196+
9431197
// isVectorVariableLengthType determines if a type requires explicit length serialization within a vector.
9441198
// Variable-length types need their length encoded before the actual data to allow proper deserialization.
9451199
// Fixed-length types, on the other hand, don't require this kind of length prefix.

0 commit comments

Comments
 (0)