EduRAG/processing_query.py at main · abu24talha/EduRAG · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# from read_chunks import create_embedding
import joblib
import requests
from google import genai  # <--- New library import
from dotenv import load_dotenv
import os
# import time

# 1. Setup Gemini Client
load_dotenv()
# The client automatically handles the configuration
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

def create_embedding(text_list):
    # https://github.com/ollama/ollama/blob//main/docs/api.md#generate-embeddings
    r = requests.post("http://localhost:11434/api/embed", json={
        "model": "bge-m3",
        "input": text_list
    })

    embedding = r.json()['embeddings']
    return embedding

def inference(prompt):
    try:
        # Using the new 2026 'gemini-3-flash' model
        print("Thinking...")
        response = client.models.generate_content(
            model='gemini-3-flash-preview',
            contents=prompt
        )
        return response.text
    except Exception as e:
        return f"Error connecting to Gemini: {e}"

# LIVE STREAMING OF THE TEXT!
# def gemini_inference_stream(prompt):
#     """
#     Streams the response live to the console and returns the complete text.
#     Designed for the google-genai SDK using the free Flash model.
#     """
#     full_response = ""

#     try:
#         # Call the streaming method
#         response_stream = client.models.generate_content_stream(
#             model='gemini-3-flash-preview',
#             contents=prompt
#         )

#         # Process the stream chunk by chunk
#         for chunk in response_stream:
#             # 1. Print the chunk to the screen immediately
#             print(chunk.text, end='', flush=True)

#             # 2. Add the chunk to our complete string
#             full_response += chunk.text

#         print() # Add a final newline when the stream finishes
#         return full_response

#     except Exception as e:
#         # Simple error handling and rate limit retry
#         if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
#             print("\n[Rate limit hit. Waiting 15 seconds...]")
#             time.sleep(15)
#             try:
#                 retry_stream = client.models.generate_content_stream(
#                     model='gemini-3-flash-preview',
#                     contents=prompt
#                 )
#                 for chunk in retry_stream:
#                     print(chunk.text, end='', flush=True)
#                     full_response += chunk.text
#                 print()
#                 return full_response
#             except Exception as retry_e:
#                 return f"\nError on retry: {retry_e}"

#         return f"\nError connecting to Gemini: {e}"

# def inference(prompt):
#     r = requests.post("http://localhost:11434/api/generate", json={
#         "model": "llama3.2",
#         "prompt": prompt,
#         "stream": False
#     })
#     response = r.json()
#     print(response)
#     return response

df = joblib.load('embeddings.joblib')

user_query = input("Type your thoughts! -> ")
query_embedding = create_embedding([user_query])[0]
# print(query_embedding)

# print(np.vstack(df['embedding'].values))
# print(np.vstack(df['embedding']).shape)

similarity = cosine_similarity(np.vstack(df['embedding']), [query_embedding]).flatten()
# print(similarity)
top_result = 5
max_index = similarity.argsort()[::-1][0:top_result]
# print(max_index)

new_df = df.loc[max_index]
# print(new_df[['title', 'number', 'text']])

prompt = f"""
You are an AI assistant helping a teacher explain content from the Sigma Web Development course.

You are given video chunks in JSON format. Each chunk contains:
- title (video title)
- number (video number)
- start (start time in seconds)
- end (end time in seconds)
- text (spoken content)

VIDEO CHUNKS:
{new_df[['title', 'number', 'start', 'end', 'text']].to_json(orient="records")}

-----------------------------------------

USER QUESTION:
{user_query}

-----------------------------------------

INSTRUCTIONS:

1. Answer ONLY using the provided video chunks.
2. Identify:
   - Which video(s) contain the answer
   - Exact timestamps (start → end)
   - How much of the topic is covered (brief / partial / detailed)

3. Response format:
   - Mention video title and number
   - Provide timestamps in seconds or mm:ss
   - Briefly describe what is explained in that segment
   - Guide the user clearly on where to watch

4. If multiple relevant chunks exist:
   - Combine them into a clear, structured answer

5. If the question is NOT related to the course or no relevant chunks are found:
   - Respond with:
     "I am designed to answer questions only related to the Sigma Web Development course content."

6. Keep the tone natural and human-like.
7. Do NOT mention JSON, chunks, or internal data.

-----------------------------------------

ANSWER:
"""

with open("prompt.txt", "w", encoding="utf-8") as f:
    f.write(prompt)

# # ... [Your existing vector search and prompt construction code] ...

#     print("\n--- Course Assistant Response ---\n")

#     # The function prints to the screen live, and saves the full text to 'answer'
#     answer = gemini_inference_stream(prompt)

#     # Now you can still save it to your file just like before!
#     with open("response.txt", "w", encoding="utf-8") as f:
#         f.write(answer)

response = inference(prompt)
print(response)
# response = inference(prompt)["response"]
# print(response)

with open("response.txt", "w", encoding="utf-8") as f:
    f.write(response)

# for index, item in new_df.iterrows():
#     print(index, item['title'], item['number'], item['text'], item['start'], item['end'])