Skip to content

Commit c9ea393

Browse files
committed
perf: optimize vector marshal/unmarshal for float32/float64/int32/int64
Add type-specialized fast paths for vector<float>, vector<double>, vector<int>, and vector<bigint> that bypass reflect-based per-element marshaling in favor of direct encoding/binary bulk conversion. Changes in marshal.go: - Type switches in marshalVector()/unmarshalVector() dispatch to dedicated functions for []float32, []float64, []int32, []int64 before falling through to the generic reflect path. - 8 new functions: marshalVectorFloat32, marshalVectorFloat64, unmarshalVectorFloat32, unmarshalVectorFloat64, marshalVectorInt32, marshalVectorInt64, unmarshalVectorInt32, unmarshalVectorInt64. - sync.Pool buffer reuse (vectorBufPool/getVectorBuf/putVectorBuf) for zero-alloc steady state when callers return buffers after the framer copies them. 64KiB cap prevents pool bloat. - Unmarshal fast paths reuse destination slice backing array when capacity is sufficient (zero-alloc steady state on read path). - Generic path preallocation via vectorFixedElemSize() + buf.Grow() for non-fast-path fixed-size types (e.g. UUID, timestamp). Benchmark results for vector<float, 1536> (typical embedding dimension): Marshal (baseline -> optimized): 86.4 us/op -> 3.4 us/op (25x faster) 3081 allocs -> 2 allocs (99.94% fewer) 28632 B/op -> 6172 B/op (78% less memory) Marshal with pool return (steady state): 86.4 us/op -> 1.6 us/op (54x faster) 3081 allocs -> 2 allocs (99.94% fewer) 28632 B/op -> 48 B/op (99.8% less memory) Unmarshal (baseline -> optimized): 60.2 us/op -> 1.5 us/op (41x faster) 2 allocs -> 0 allocs (100% fewer) 6168 B/op -> 0 B/op (100% less memory) Round-trip (baseline -> optimized, pooled): 147.8 us/op -> 3.1 us/op (48x faster) 3083 allocs -> 2 allocs (99.94% fewer) 34800 B/op -> 48 B/op (99.9% less memory) Throughput: 80 MB/s -> 3.5 GB/s (geomean, +2900%) New test files: - marshal_vector_test.go: 58 unit subtests across 13 categories (round-trip, byte-compat, slice-reuse, nil, dimension-mismatch, empty-vector, pointer-to-slice, special-values, pool-concurrency, oversized-not-pooled, fixed-elem-size, generic-prealloc). - vector_bench_test.go: extended with int32/int64 and pooled benchmarks. - tests/bench/bench_vector_public_test.go: public API benchmarks for int32/int64 marshal/unmarshal. Subsumes PR #744 (float fast paths) and PR #745 (generic prealloc). Extends with int32/int64 fast paths and buffer pooling not covered by any existing PR.
1 parent 0952897 commit c9ea393

File tree

4 files changed

+1974
-0
lines changed

4 files changed

+1974
-0
lines changed

marshal.go

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ package gocql
2626

2727
import (
2828
"bytes"
29+
"encoding/binary"
2930
"errors"
3031
"fmt"
3132
"math"
3233
"math/bits"
3334
"reflect"
35+
"sync"
3436
"unsafe"
3537

3638
"github.com/gocql/gocql/serialization/ascii"
@@ -839,6 +841,26 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
839841
return nil, nil
840842
}
841843

844+
// Fast paths for common vector types — avoid reflect entirely.
845+
switch info.SubType.Type() {
846+
case TypeFloat:
847+
if vec, ok := value.([]float32); ok {
848+
return marshalVectorFloat32(info.Dimensions, vec)
849+
}
850+
case TypeDouble:
851+
if vec, ok := value.([]float64); ok {
852+
return marshalVectorFloat64(info.Dimensions, vec)
853+
}
854+
case TypeInt:
855+
if vec, ok := value.([]int32); ok {
856+
return marshalVectorInt32(info.Dimensions, vec)
857+
}
858+
case TypeBigInt:
859+
if vec, ok := value.([]int64); ok {
860+
return marshalVectorInt64(info.Dimensions, vec)
861+
}
862+
}
863+
842864
rv := reflect.ValueOf(value)
843865
t := rv.Type()
844866
k := t.Kind()
@@ -855,6 +877,13 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
855877
}
856878

857879
isLengthType := isVectorVariableLengthType(info.SubType)
880+
if !isLengthType {
881+
if elemSize := vectorFixedElemSize(info.SubType); elemSize > 0 {
882+
if needed := int64(n) * int64(elemSize); needed > 0 && needed <= math.MaxInt32 {
883+
buf.Grow(int(needed))
884+
}
885+
}
886+
}
858887
for i := 0; i < n; i++ {
859888
item, err := Marshal(info.SubType, rv.Index(i).Interface())
860889
if err != nil {
@@ -871,6 +900,26 @@ func marshalVector(info VectorType, value interface{}) ([]byte, error) {
871900
}
872901

873902
func unmarshalVector(info VectorType, data []byte, value interface{}) error {
903+
// Fast paths for common vector types — avoid reflect entirely.
904+
switch info.SubType.Type() {
905+
case TypeFloat:
906+
if dst, ok := value.(*[]float32); ok {
907+
return unmarshalVectorFloat32(info.Dimensions, data, dst)
908+
}
909+
case TypeDouble:
910+
if dst, ok := value.(*[]float64); ok {
911+
return unmarshalVectorFloat64(info.Dimensions, data, dst)
912+
}
913+
case TypeInt:
914+
if dst, ok := value.(*[]int32); ok {
915+
return unmarshalVectorInt32(info.Dimensions, data, dst)
916+
}
917+
case TypeBigInt:
918+
if dst, ok := value.(*[]int64); ok {
919+
return unmarshalVectorInt64(info.Dimensions, data, dst)
920+
}
921+
}
922+
874923
rv := reflect.ValueOf(value)
875924
if rv.Kind() != reflect.Ptr {
876925
return unmarshalErrorf("can not unmarshal into non-pointer %T", value)
@@ -943,6 +992,212 @@ func unmarshalVector(info VectorType, data []byte, value interface{}) error {
943992
// isVectorVariableLengthType determines if a type requires explicit length serialization within a vector.
944993
// Variable-length types need their length encoded before the actual data to allow proper deserialization.
945994
// Fixed-length types, on the other hand, don't require this kind of length prefix.
995+
996+
// vectorBufPool pools []byte buffers used by vector marshal fast paths.
997+
// Buffers are returned to the pool by putVectorBuf after the framer copies them.
998+
//
999+
// NOTE: putVectorBuf is currently exercised only by benchmarks/tests.
1000+
// Wiring it into the connection write path (so production callers return
1001+
// buffers after the framer copies them) is planned for a follow-up change.
1002+
var vectorBufPool = sync.Pool{}
1003+
1004+
func getVectorBuf(size int) []byte {
1005+
if v := vectorBufPool.Get(); v != nil {
1006+
if buf, ok := v.([]byte); ok {
1007+
if cap(buf) >= size {
1008+
return buf[:size]
1009+
}
1010+
// Undersized buffer: return it so smaller vectors can reuse it.
1011+
vectorBufPool.Put(buf) //nolint:staticcheck // SA6002: []byte is a value type; boxing cost is acceptable for pool reuse
1012+
}
1013+
}
1014+
return make([]byte, size)
1015+
}
1016+
1017+
func putVectorBuf(buf []byte) {
1018+
if buf == nil || cap(buf) > 65536 {
1019+
return
1020+
}
1021+
vectorBufPool.Put(buf) //nolint:staticcheck // SA6002: []byte is a value type; boxing cost is acceptable for pool reuse
1022+
}
1023+
1024+
// marshalVectorFloat32 encodes a float32 slice as a contiguous big-endian
1025+
// IEEE 754 vector. Uses a pooled buffer for zero-alloc steady state.
1026+
func marshalVectorFloat32(dim int, vec []float32) ([]byte, error) {
1027+
if vec == nil {
1028+
return nil, nil
1029+
}
1030+
if len(vec) != dim {
1031+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1032+
}
1033+
buf := getVectorBuf(dim * 4)
1034+
for i, v := range vec {
1035+
binary.BigEndian.PutUint32(buf[i*4:], math.Float32bits(v))
1036+
}
1037+
return buf, nil
1038+
}
1039+
1040+
// marshalVectorFloat64 encodes a float64 slice as a contiguous big-endian
1041+
// IEEE 754 vector. Uses a pooled buffer for zero-alloc steady state.
1042+
func marshalVectorFloat64(dim int, vec []float64) ([]byte, error) {
1043+
if vec == nil {
1044+
return nil, nil
1045+
}
1046+
if len(vec) != dim {
1047+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1048+
}
1049+
buf := getVectorBuf(dim * 8)
1050+
for i, v := range vec {
1051+
binary.BigEndian.PutUint64(buf[i*8:], math.Float64bits(v))
1052+
}
1053+
return buf, nil
1054+
}
1055+
1056+
// unmarshalVectorFloat32 decodes contiguous big-endian IEEE 754 floats.
1057+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1058+
func unmarshalVectorFloat32(dim int, data []byte, dst *[]float32) error {
1059+
if data == nil {
1060+
*dst = nil
1061+
return nil
1062+
}
1063+
expected := dim * 4
1064+
if len(data) != expected {
1065+
return unmarshalErrorf("unmarshal vector<float, %d>: expected %d bytes, got %d", dim, expected, len(data))
1066+
}
1067+
vec := *dst
1068+
if cap(vec) >= dim {
1069+
vec = vec[:dim]
1070+
} else {
1071+
vec = make([]float32, dim)
1072+
}
1073+
for i := 0; i < dim; i++ {
1074+
vec[i] = math.Float32frombits(binary.BigEndian.Uint32(data[i*4:]))
1075+
}
1076+
*dst = vec
1077+
return nil
1078+
}
1079+
1080+
// unmarshalVectorFloat64 decodes contiguous big-endian IEEE 754 doubles.
1081+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1082+
func unmarshalVectorFloat64(dim int, data []byte, dst *[]float64) error {
1083+
if data == nil {
1084+
*dst = nil
1085+
return nil
1086+
}
1087+
expected := dim * 8
1088+
if len(data) != expected {
1089+
return unmarshalErrorf("unmarshal vector<double, %d>: expected %d bytes, got %d", dim, expected, len(data))
1090+
}
1091+
vec := *dst
1092+
if cap(vec) >= dim {
1093+
vec = vec[:dim]
1094+
} else {
1095+
vec = make([]float64, dim)
1096+
}
1097+
for i := 0; i < dim; i++ {
1098+
vec[i] = math.Float64frombits(binary.BigEndian.Uint64(data[i*8:]))
1099+
}
1100+
*dst = vec
1101+
return nil
1102+
}
1103+
1104+
// marshalVectorInt32 encodes an int32 slice as a contiguous big-endian
1105+
// vector (CQL int = 4 bytes). Uses a pooled buffer for zero-alloc steady state.
1106+
func marshalVectorInt32(dim int, vec []int32) ([]byte, error) {
1107+
if vec == nil {
1108+
return nil, nil
1109+
}
1110+
if len(vec) != dim {
1111+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1112+
}
1113+
buf := getVectorBuf(dim * 4)
1114+
for i, v := range vec {
1115+
binary.BigEndian.PutUint32(buf[i*4:], uint32(v))
1116+
}
1117+
return buf, nil
1118+
}
1119+
1120+
// marshalVectorInt64 encodes an int64 slice as a contiguous big-endian
1121+
// vector (CQL bigint = 8 bytes). Uses a pooled buffer for zero-alloc steady state.
1122+
func marshalVectorInt64(dim int, vec []int64) ([]byte, error) {
1123+
if vec == nil {
1124+
return nil, nil
1125+
}
1126+
if len(vec) != dim {
1127+
return nil, marshalErrorf("expected vector with %d dimensions, received %d", dim, len(vec))
1128+
}
1129+
buf := getVectorBuf(dim * 8)
1130+
for i, v := range vec {
1131+
binary.BigEndian.PutUint64(buf[i*8:], uint64(v))
1132+
}
1133+
return buf, nil
1134+
}
1135+
1136+
// unmarshalVectorInt32 decodes contiguous big-endian CQL int (4-byte) values.
1137+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1138+
func unmarshalVectorInt32(dim int, data []byte, dst *[]int32) error {
1139+
if data == nil {
1140+
*dst = nil
1141+
return nil
1142+
}
1143+
expected := dim * 4
1144+
if len(data) != expected {
1145+
return unmarshalErrorf("unmarshal vector<int, %d>: expected %d bytes, got %d", dim, expected, len(data))
1146+
}
1147+
vec := *dst
1148+
if cap(vec) >= dim {
1149+
vec = vec[:dim]
1150+
} else {
1151+
vec = make([]int32, dim)
1152+
}
1153+
for i := 0; i < dim; i++ {
1154+
vec[i] = int32(binary.BigEndian.Uint32(data[i*4:]))
1155+
}
1156+
*dst = vec
1157+
return nil
1158+
}
1159+
1160+
// unmarshalVectorInt64 decodes contiguous big-endian CQL bigint (8-byte) values.
1161+
// Reuses the destination slice's backing array when capacity allows (zero-alloc steady state).
1162+
func unmarshalVectorInt64(dim int, data []byte, dst *[]int64) error {
1163+
if data == nil {
1164+
*dst = nil
1165+
return nil
1166+
}
1167+
expected := dim * 8
1168+
if len(data) != expected {
1169+
return unmarshalErrorf("unmarshal vector<bigint, %d>: expected %d bytes, got %d", dim, expected, len(data))
1170+
}
1171+
vec := *dst
1172+
if cap(vec) >= dim {
1173+
vec = vec[:dim]
1174+
} else {
1175+
vec = make([]int64, dim)
1176+
}
1177+
for i := 0; i < dim; i++ {
1178+
vec[i] = int64(binary.BigEndian.Uint64(data[i*8:]))
1179+
}
1180+
*dst = vec
1181+
return nil
1182+
}
1183+
1184+
// vectorFixedElemSize returns the known wire-format byte size for fixed-length
1185+
// CQL types used as vector elements. Returns 0 for variable-length or unknown types.
1186+
func vectorFixedElemSize(elemType TypeInfo) int {
1187+
switch elemType.Type() {
1188+
case TypeBoolean:
1189+
return 1
1190+
case TypeInt, TypeFloat:
1191+
return 4
1192+
case TypeBigInt, TypeDouble, TypeTimestamp:
1193+
return 8
1194+
case TypeUUID, TypeTimeUUID:
1195+
return 16
1196+
default:
1197+
return 0
1198+
}
1199+
}
1200+
9461201
func isVectorVariableLengthType(elemType TypeInfo) bool {
9471202
switch elemType.Type() {
9481203
case TypeVarchar, TypeAscii, TypeBlob, TypeText,

0 commit comments

Comments
 (0)