generated from amazon-archives/__template_MIT-0
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_vector_data.py
More file actions
36 lines (27 loc) · 1.05 KB
/
load_vector_data.py
File metadata and controls
36 lines (27 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import faiss
import json
from pathlib import Path
from sentence_transformers import SentenceTransformer
def load_to_vector_store():
model = SentenceTransformer('all-MiniLM-L6-v2')
documents = []
data_dir = Path("data")
for faq_file in sorted(data_dir.glob("*.txt")):
with open(faq_file, 'r', encoding='utf-8') as f:
text = f.read()
documents.append({
'filename': faq_file.name,
'text': text
})
print(f"Loading {len(documents)} FAQ documents...")
texts = [doc['text'] for doc in documents]
embeddings = model.encode(texts, show_progress_bar=True)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings.astype('float32'))
faiss.write_index(index, "faqs_vector.index")
with open("faqs_docs.json", "w", encoding="utf-8") as f:
json.dump(documents, f)
print(f"✅ Vector store created with {len(documents)} documents")
if __name__ == "__main__":
load_to_vector_store()