-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrag.py
More file actions
98 lines (77 loc) · 2.97 KB
/
Copy pathrag.py
File metadata and controls
98 lines (77 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import logging
from groq import Groq
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from documents import documents
# Load environment variables from .env file
load_dotenv()
# Set up logging so we can see what is happening
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load the embedding model
logger.info("Loading embedding model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
logger.info("Embedding model loaded successfully!")
# Set up Groq client using API key from .env
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Pre-compute document vectors once at startup
# This is a key performance optimization
document_texts = [doc["content"] for doc in documents]
document_vectors = model.encode(document_texts)
logger.info(f"Indexed {len(documents)} documents successfully!")
def search(question: str, top_k: int = 3) -> list:
"""
Retrieve most relevant documents using semantic similarity.
This is the RETRIEVAL step of RAG:
- Convert question to vector
- Compare against all document vectors
- Return top_k most similar documents
"""
# Convert question to vector
question_vector = model.encode([question])
# Calculate cosine similarity between question and all documents
similarities = cosine_similarity(question_vector, document_vectors)[0]
# Get top_k most similar document indices
top_indices = np.argsort(similarities)[::-1][:top_k]
# Build results list
results = []
for idx in top_indices:
results.append({
"title": documents[idx]["title"],
"content": documents[idx]["content"],
"similarity_score": round(float(similarities[idx]), 4)
})
return results
def generate_answer(question: str, context_chunks: list) -> str:
"""
Generate a final answer using Groq LLM.
This is the GENERATION step of RAG:
- Take retrieved document chunks as context
- Inject context into prompt (context-injection)
- Send to Groq LLM for answer generation
- Return grounded, accurate answer
"""
# Build context from retrieved documents
context = "\n\n".join([
f"[{chunk['title']}]: {chunk['content']}"
for chunk in context_chunks
])
# Prompt engineering — explicit instructions to the LLM
prompt = f"""You are an enterprise search assistant for SAP LeanIX.
Your job is to answer questions based ONLY on the provided context.
Do not use any outside knowledge. If the answer is not in the context, say so clearly.
Context:
{context}
Question: {question}
Answer:"""
# Call Groq API with the prompt
response = groq_client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": prompt}],
max_tokens=500,
temperature=0.1
)
return response.choices[0].message.content