Skip to content

Commit cd939a0

Browse files
committed
test: add sample test
1 parent b6197df commit cd939a0

File tree

1 file changed

+94
-1
lines changed

1 file changed

+94
-1
lines changed

main.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,98 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import chromadb
4+
from pathlib import Path
5+
import openai
6+
from dotenv import load_dotenv
7+
8+
# Load environment variables
9+
load_dotenv()
10+
11+
112
def main():
2-
print("Hello from ignition-rag-agent!")
13+
print("Starting codebase indexing with OpenAI embeddings...")
14+
15+
# Initialize OpenAI client
16+
openai_api_key = os.getenv("OPENAI_API_KEY")
17+
client = openai.OpenAI(api_key=openai_api_key)
18+
print("Initialized OpenAI client")
19+
20+
# Initialize Chroma client - using PersistentClient for local storage
21+
chroma_client = chromadb.PersistentClient(path="./chroma_index")
22+
print("Initialized Chroma client")
23+
24+
# Get or create collection
25+
collection_name = "codebase"
26+
# Delete if exists
27+
try:
28+
chroma_client.delete_collection(collection_name)
29+
print(f"Deleted existing collection: {collection_name}")
30+
except Exception:
31+
pass
32+
33+
collection = chroma_client.create_collection(name=collection_name)
34+
print(f"Created collection: {collection_name}")
35+
36+
# Find Python files in the project (excluding .venv directory)
37+
project_path = Path(".")
38+
py_files = []
39+
for path in project_path.rglob("*.py"):
40+
if ".venv" not in str(path) and "__pycache__" not in str(path):
41+
py_files.append(str(path))
42+
43+
print(f"Found {len(py_files)} Python files in the project")
44+
45+
# Process each file
46+
doc_count = 0
47+
for file_path in py_files:
48+
try:
49+
with open(file_path, "r") as f:
50+
content = f.read()
51+
52+
# Skip empty files
53+
if not content.strip():
54+
print(f"Skipping empty file: {file_path}")
55+
continue
56+
57+
# Generate embedding using OpenAI
58+
response = client.embeddings.create(
59+
input=content, model="text-embedding-3-small"
60+
)
61+
embedding = response.data[0].embedding
62+
63+
# Add to collection
64+
file_id = file_path.replace("/", "_")
65+
collection.add(
66+
ids=[file_id],
67+
documents=[content],
68+
embeddings=[embedding],
69+
metadatas=[{"source": file_path}],
70+
)
71+
doc_count += 1
72+
73+
print(f"Indexed {file_path}")
74+
except Exception as e:
75+
print(f"Error processing {file_path}: {e}")
76+
77+
print(f"Indexing complete. Collection now has {collection.count()} documents")
78+
print(f"Total documents indexed: {doc_count}")
79+
80+
# Test a simple query
81+
query_text = "How does the indexer handle JSON files?"
82+
query_response = client.embeddings.create(
83+
input=query_text, model="text-embedding-3-small"
84+
)
85+
query_embedding = query_response.data[0].embedding
86+
87+
results = collection.query(query_embeddings=[query_embedding], n_results=3)
88+
89+
print("\nTest Query Results:")
90+
print(f"Query: '{query_text}'")
91+
for i, (doc, metadata) in enumerate(
92+
zip(results["documents"][0], results["metadatas"][0])
93+
):
94+
print(f"\nResult {i+1} from {metadata['source']}:")
95+
print(f"{doc[:150]}...")
396

497

598
if __name__ == "__main__":

0 commit comments

Comments
 (0)