-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedding-storage.service.ts
More file actions
120 lines (104 loc) · 2.9 KB
/
embedding-storage.service.ts
File metadata and controls
120 lines (104 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/**
* Embedding Storage Service
* Handles persistence of embedding metadata to PostgreSQL
*/
import { ChunkEmbedding } from '../types';
import { logger } from '../utils/logger';
import { db } from '../database/connection';
export class EmbeddingStorageService {
/**
* Save embedding metadata to database
*/
async saveEmbeddings(embeddings: ChunkEmbedding[]): Promise<void> {
if (embeddings.length === 0) {
logger.warn('No embeddings to save');
return;
}
logger.info('Saving embedding metadata to database', {
count: embeddings.length,
});
await db.transaction(async (client) => {
for (const embedding of embeddings) {
await client.query(
`INSERT INTO chunk_embeddings (chunk_id, model, dimensions, generated_at)
VALUES ($1, $2, $3, $4)
ON CONFLICT (chunk_id, model)
DO UPDATE SET
dimensions = EXCLUDED.dimensions,
generated_at = EXCLUDED.generated_at`,
[
embedding.chunkId,
embedding.model,
embedding.dimensions,
embedding.generatedAt,
]
);
}
});
logger.info('Embedding metadata saved successfully', {
count: embeddings.length,
});
}
/**
* Get embedding metadata by chunk ID
*/
async getEmbeddingByChunk(chunkId: string): Promise<{
model: string;
dimensions: number;
generatedAt: Date;
} | null> {
const result = await db.query(
`SELECT model, dimensions, generated_at
FROM chunk_embeddings
WHERE chunk_id = $1
LIMIT 1`,
[chunkId]
);
if (result.rows.length === 0) {
return null;
}
const row = result.rows[0];
return {
model: row.model,
dimensions: row.dimensions,
generatedAt: row.generated_at,
};
}
/**
* Check if chunk has embedding
*/
async hasEmbedding(chunkId: string): Promise<boolean> {
const result = await db.query(
'SELECT EXISTS(SELECT 1 FROM chunk_embeddings WHERE chunk_id = $1) as exists',
[chunkId]
);
return result.rows[0].exists;
}
/**
* Delete embeddings for specific chunks
*/
async deleteEmbeddings(chunkIds: string[]): Promise<void> {
if (chunkIds.length === 0) {
return;
}
logger.info('Deleting embedding metadata', { count: chunkIds.length });
await db.query(
'DELETE FROM chunk_embeddings WHERE chunk_id = ANY($1)',
[chunkIds]
);
logger.info('Embedding metadata deleted', { count: chunkIds.length });
}
/**
* Get embedding count for document
*/
async getEmbeddingCountForDocument(documentId: string): Promise<number> {
const result = await db.query(
`SELECT COUNT(*) as count
FROM chunk_embeddings ce
JOIN text_chunks tc ON ce.chunk_id = tc.id
WHERE tc.document_id = $1`,
[documentId]
);
return parseInt(result.rows[0].count, 10);
}
}