@@ -85673,8 +85673,9 @@ void blobSpotFree(BlobSpot *pBlobSpot);
85673
85673
85674
85674
/*
85675
85675
* Accessor for node binary format
85676
- * - v1 format is the following:
85677
- * [u64 nRowid] [u16 nEdges] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u64 legacyField] [u64 edgeId]) * nEdges
85676
+ * - default format is the following:
85677
+ * [u64 nRowid] [u16 nEdges] [6 byte padding] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u32 unused] [f32 distance] [u64 edgeId]) * nEdges
85678
+ * Note, that 6 byte padding after nEdges required to align [node vector] by word boundary and avoid unaligned reads
85678
85679
* Note, that node vector and edge vector can have different representations (and edge vector can be smaller in size than node vector)
85679
85680
*/
85680
85681
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex);
@@ -85713,9 +85714,11 @@ typedef u8 MetricType;
85713
85714
/*
85714
85715
* 1 - v1 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u64 unused ] [u64 edge rowid] ] ...
85715
85716
* 2 - v2 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u32 unused] [f32 distance] [u64 edge rowid] ] ...
85717
+ * 3 - v3 version; node meta aligned to 8-byte boundary (instead of having u64 + u16 size - we round it up to u64 + u64)
85716
85718
*/
85717
85719
#define VECTOR_FORMAT_V1 1
85718
- #define VECTOR_FORMAT_DEFAULT 2
85720
+ #define VECTOR_FORMAT_V2 2
85721
+ #define VECTOR_FORMAT_DEFAULT 3
85719
85722
85720
85723
/* type of the vector index */
85721
85724
#define VECTOR_INDEX_TYPE_PARAM_ID 2
@@ -212727,8 +212730,6 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
212727
212730
*/
212728
212731
#define DISKANN_BLOCK_SIZE_SHIFT 9
212729
212732
212730
- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
212731
- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
212732
212733
212733
212734
typedef struct VectorPair VectorPair;
212734
212735
typedef struct DiskAnnSearchCtx DiskAnnSearchCtx;
@@ -212951,46 +212952,58 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
212951
212952
** Layout specific utilities
212952
212953
**************************************************************************/
212953
212954
212954
- int nodeEdgeOverhead(int nEdgeVectorSize){
212955
- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
212955
+ int nodeMetadataSize(int nFormatVersion){
212956
+ if( nFormatVersion <= VECTOR_FORMAT_V2 ){
212957
+ return (sizeof(u64) + sizeof(u16));
212958
+ }else{
212959
+ return (sizeof(u64) + sizeof(u64));
212960
+ }
212961
+ }
212962
+
212963
+ int edgeMetadataSize(int nFormatVersion){
212964
+ return (sizeof(u64) + sizeof(u64));
212965
+ }
212966
+
212967
+ int nodeEdgeOverhead(int nFormatVersion, int nEdgeVectorSize){
212968
+ return nEdgeVectorSize + edgeMetadataSize(nFormatVersion);
212956
212969
}
212957
212970
212958
- int nodeOverhead(int nNodeVectorSize){
212959
- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
212971
+ int nodeOverhead(int nFormatVersion, int nNodeVectorSize){
212972
+ return nNodeVectorSize + nodeMetadataSize(nFormatVersion) ;
212960
212973
}
212961
212974
212962
212975
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
212963
- unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
212976
+ unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nFormatVersion, pIndex-> nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nFormatVersion, pIndex->nEdgeVectorSize);
212964
212977
assert( nMaxEdges > 0);
212965
212978
return nMaxEdges;
212966
212979
}
212967
212980
212968
212981
int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex){
212969
212982
unsigned int offset;
212970
212983
unsigned int nMaxEdges = nodeEdgesMaxCount(pIndex);
212971
- offset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212984
+ offset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212972
212985
assert( offset <= pIndex->nBlockSize );
212973
212986
return offset;
212974
212987
}
212975
212988
212976
212989
void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector){
212977
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212990
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212978
212991
212979
212992
memset(pBlobSpot->pBuffer, 0, pBlobSpot->nBufferSize);
212980
212993
writeLE64(pBlobSpot->pBuffer, nRowid);
212981
212994
// neighbours count already zero after memset - no need to set it explicitly
212982
212995
212983
- vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex->nNodeVectorSize);
212996
+ vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) , pIndex->nNodeVectorSize);
212984
212997
}
212985
212998
212986
212999
void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector) {
212987
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
213000
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212988
213001
212989
- vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE );
213002
+ vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) );
212990
213003
}
212991
213004
212992
213005
u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
212993
- assert( VECTOR_NODE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213006
+ assert( nodeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
212994
213007
212995
213008
return readLE16(pBlobSpot->pBuffer + sizeof(u64));
212996
213009
}
@@ -213000,20 +213013,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
213000
213013
int offset = nodeEdgesMetadataOffset(pIndex);
213001
213014
213002
213015
if( pRowid != NULL ){
213003
- assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213004
- *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
213016
+ assert( offset + (iEdge + 1) * edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213017
+ *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u64));
213005
213018
}
213006
213019
if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
213007
- distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
213020
+ distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u32));
213008
213021
*pDistance = *((float*)&distance);
213009
213022
}
213010
213023
if( pVector != NULL ){
213011
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213024
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213012
213025
vectorInitStatic(
213013
213026
pVector,
213014
213027
pIndex->nEdgeVectorType,
213015
213028
pIndex->nVectorDims,
213016
- pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213029
+ pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213017
213030
);
213018
213031
}
213019
213032
}
@@ -213050,11 +213063,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
213050
213063
nEdges++;
213051
213064
}
213052
213065
213053
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213054
- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
213066
+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213067
+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * edgeMetadataSize(pIndex->nFormatVersion) ;
213055
213068
213056
213069
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213057
- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213070
+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213058
213071
213059
213072
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
213060
213073
writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
@@ -213070,19 +213083,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
213070
213083
213071
213084
assert( 0 <= iDelete && iDelete < nEdges );
213072
213085
213073
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213074
- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213075
- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
213076
- lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * VECTOR_EDGE_METADATA_SIZE ;
213086
+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213087
+ lastVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213088
+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * edgeMetadataSize(pIndex->nFormatVersion) ;
213089
+ lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * edgeMetadataSize(pIndex->nFormatVersion) ;
213077
213090
213078
213091
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213079
213092
assert( lastVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213080
- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213081
- assert( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213093
+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213094
+ assert( lastMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213082
213095
213083
213096
if( edgeVectorOffset < lastVectorOffset ){
213084
213097
memmove(pBlobSpot->pBuffer + edgeVectorOffset, pBlobSpot->pBuffer + lastVectorOffset, pIndex->nEdgeVectorSize);
213085
- memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, VECTOR_EDGE_METADATA_SIZE );
213098
+ memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, edgeMetadataSize(pIndex->nFormatVersion) );
213086
213099
}
213087
213100
213088
213101
writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges - 1);
@@ -213168,9 +213181,9 @@ int diskAnnCreateIndex(
213168
213181
if( maxNeighborsParam == 0 ){
213169
213182
// 3 D**(1/2) gives good recall values (90%+)
213170
213183
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
213171
- maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(neighbours, dims)) + 1);
213184
+ maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims))) / nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims)) + 1);
213172
213185
}
213173
- blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(neighbours, dims));
213186
+ blockSizeBytes = nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims));
213174
213187
if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
213175
213188
return SQLITE_ERROR;
213176
213189
}
0 commit comments