Skip to content

Commit 4792bf8

Browse files
committed
added websearch feature
Signed-off-by: lochanpaudel <lochanpaudel10@gmail.com>
1 parent ddfdffc commit 4792bf8

File tree

3 files changed

+410
-3
lines changed

3 files changed

+410
-3
lines changed

src/mvt/pages/chatbot.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
from menu import menu_with_redirect
55
from chat_history import init_db, save_message, get_messages
66
from query_rewriting import query_rewriting_llm
7-
from database import create_connection, create_all_tables, insert_response, insert_document, link_document_response, get_document_by_source, get_user
7+
from web_utils import Apputils
88
import json
9+
from database import create_connection, create_all_tables, insert_response, insert_document, link_document_response, get_user
910

1011
def save_feedback(username, msg_idx, feedback_type, response_snippet, reason=None):
1112
import datetime
@@ -43,7 +44,7 @@ def save_feedback(username, msg_idx, feedback_type, response_snippet, reason=Non
4344

4445
try:
4546
rag_chain = get_ragchain(filter)
46-
except FileNotFoundError as e:
47+
except FileNotFoundError:
4748
st.error("⚠️ Knowledge base not initialized!")
4849
st.info("""
4950
The AI FAQ system needs to be set up first. Please:
@@ -153,7 +154,40 @@ def save_feedback(username, msg_idx, feedback_type, response_snippet, reason=Non
153154
with st.spinner("Thinking..."):
154155
# Use rewritten query or original prompt based on config
155156
query = query_rewriting_llm(prompt) if config_data.get("use_query_rewriting", True) else prompt
156-
response = rag_chain.invoke({"input": query})
157+
158+
# Try a lightweight function-calling step to decide if a web-search is needed.
159+
# If the LLM requests a web-search function, execute it and re-run the RAG chain with the results.
160+
try:
161+
func_messages = [
162+
{"role": "system", "content": "Decide whether a web search is required to better answer the user's query. If so, call an appropriate web search function with the best query string."},
163+
{"role": "user", "content": query}
164+
]
165+
func_resp = Apputils.ask_llm_function_caller(
166+
gpt_model=config_data.get('model_name'),
167+
temperature=config_data.get('temperature', 0.0),
168+
messages=func_messages,
169+
function_json_list=Apputils.wrap_functions()
170+
)
171+
172+
# If model requested a tool call, execute it and augment the query
173+
if hasattr(func_resp.choices[0].message, 'tool_calls') and func_resp.choices[0].message.tool_calls:
174+
web_results = Apputils.execute_json_function(func_resp)
175+
# Format web results into a short text blob for the RAG chain
176+
def _fmt(r):
177+
title = r.get('title') or r.get('text') or ''
178+
url = r.get('href') or r.get('url') or r.get('link') or ''
179+
snippet = r.get('body') or r.get('snippet') or r.get('text') or r.get('description') or ''
180+
return f"- {title}\n {url}\n {snippet}\n"
181+
182+
web_text = "\n".join([_fmt(r) for r in web_results]) if isinstance(web_results, list) else str(web_results)
183+
augmented_query = f"{query}\n\nWeb search results (from live web):\n{web_text}\nPlease incorporate these results into your answer and prioritize them along with the knowledge base."
184+
response = rag_chain.invoke({"input": augmented_query})
185+
else:
186+
response = rag_chain.invoke({"input": query})
187+
except Exception as e:
188+
# On any error in the function-call step, fall back to the normal RAG invocation
189+
print(f"Web-search function-calling step failed: {e}")
190+
response = rag_chain.invoke({"input": query})
157191

158192
# Save response to database instead of text file
159193
conn = create_connection()

src/mvt/web_search.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
from duckduckgo_search import DDGS
2+
# The DDGS and AsyncDDGS classes are used to retrieve search results from DuckDuckGo.com.
3+
from typing import List, Optional
4+
5+
6+
class WebSearch:
7+
@staticmethod
8+
def retrieve_web_search_results(query: str, max_results: Optional[int] = 5) -> List:
9+
"""
10+
Retrieve search results from duckduckgo.com.
11+
12+
Args:
13+
query (str): The search query to retrieve results for.
14+
max_results Optional[int]: The maximum number of search results to retrieve (default 5).
15+
16+
Returns:
17+
List of dictionaries containing the title, URL, and description of each search result.
18+
"""
19+
20+
with DDGS() as ddgs:
21+
results = [r for r in ddgs.text(query, max_results=max_results)]
22+
return results
23+
24+
@staticmethod
25+
def web_search_text(query: str, max_results: Optional[int] = 5) -> List:
26+
"""
27+
Search for text on duckduckgo.com.
28+
29+
Args:
30+
query (str): The text to search for.
31+
max_results Optional[int]: The maximum number of search results to retrieve (default 10).
32+
33+
Returns:
34+
List of search results as strings.
35+
"""
36+
37+
with DDGS() as ddgs:
38+
results = [r for r in ddgs.text(
39+
query, region='wt-wt', safesearch='off', timelimit='y', max_results=max_results)]
40+
return results
41+
42+
@staticmethod
43+
def web_search_pdf(query: str, max_results: Optional[int] = 5) -> List:
44+
"""
45+
Search for PDF files on duckduckgo.com.
46+
47+
Args:
48+
query (str): The text to search for.
49+
max_results Optional[int]: The maximum number of search results to retrieve (default 10).
50+
51+
Returns:
52+
List of search results as dictionaries containing the title, URL, and description of each PDF file.
53+
"""
54+
# Searching for pdf files
55+
with DDGS() as ddgs:
56+
results = [r for r in ddgs.text(
57+
f'{query}:pdf', region='wt-wt', safesearch='off', timelimit='y', max_results=max_results)]
58+
return results
59+
60+
@staticmethod
61+
def get_instant_web_answer(query: str) -> List:
62+
"""
63+
Retrieve instant answers from DuckDuckGo.com.
64+
65+
Args:
66+
query (str): The text to search for.
67+
68+
Returns:
69+
List of instant answers as strings.
70+
"""
71+
with DDGS() as ddgs:
72+
results = [r for r in ddgs.answers(query)]
73+
return results
74+
75+
@staticmethod
76+
def web_search_image(keywords: str, max_results: Optional[int] = 5) -> List:
77+
"""
78+
Search for images on DuckDuckGo.com.
79+
80+
Args:
81+
keywords (str): The keywords to search for.
82+
max_results Optional[int]: The maximum number of search results to retrieve (default 100).
83+
84+
Returns:
85+
List of search results as dictionaries containing the title, URL, and image URL of each image.
86+
87+
"""
88+
89+
with DDGS() as ddgs:
90+
ddgs_images_gen = ddgs.images(
91+
keywords,
92+
region="us-en",
93+
safesearch="on",
94+
size=None,
95+
color=None,
96+
type_image=None,
97+
layout=None,
98+
license_image=None,
99+
max_results=max_results,
100+
)
101+
# print(ddgs_images_gen)
102+
results = [r for r in ddgs_images_gen]
103+
return results
104+
105+
@staticmethod
106+
def web_search_video(keywords: str, max_results: Optional[int] = 5) -> List:
107+
"""
108+
Search for videos on DuckDuckGo.com.
109+
110+
Args:
111+
keywords (str): The keywords to search for.
112+
max_results Optional[int]: The maximum number of search results to retrieve (default 100).
113+
114+
Returns:
115+
List of search results as dictionaries containing the title, URL, and thumbnail URL of each video.
116+
"""
117+
with DDGS() as ddgs:
118+
ddgs_videos_gen = ddgs.videos(
119+
keywords,
120+
region="wt-wt",
121+
safesearch="off",
122+
timelimit="w",
123+
resolution="high",
124+
duration="medium",
125+
max_results=max_results,
126+
)
127+
results = [r for r in ddgs_videos_gen]
128+
return results
129+
130+
@staticmethod
131+
def web_search_news(keywords: str, max_results: Optional[int] = 5) -> List:
132+
"""
133+
Search for news articles on DuckDuckGo.com.
134+
135+
Args:
136+
keywords (str): The keywords to search for.
137+
max_results Optional[int]: The maximum number of search results to retrieve (default 20).
138+
139+
Returns:
140+
List of search results as dictionaries containing the title, URL, and snippet of each news article.
141+
"""
142+
143+
with DDGS() as ddgs:
144+
ddgs_news_gen = ddgs.news(
145+
keywords,
146+
region="wt-wt",
147+
safesearch="off",
148+
timelimit="m",
149+
max_results=max_results
150+
)
151+
results = [r for r in ddgs_news_gen]
152+
return results
153+
154+
@staticmethod
155+
def web_search_map(query: str, place: str = "Ottawa", max_results: Optional[int] = 5):
156+
"""
157+
Search for maps on DuckDuckGo.com.
158+
159+
Args:
160+
query (str): The text to search for.
161+
place (str): The location to search for maps of (default "ottawa").
162+
max_results Optional[int]: The maximum number of search results to retrieve (default 50).
163+
164+
Returns:
165+
List of search results as dictionaries containing the title, URL, and image URL of each map.
166+
"""
167+
with DDGS() as ddgs:
168+
results = [r for r in ddgs.maps(
169+
query, place=place, max_results=max_results)]
170+
return results
171+
172+
@staticmethod
173+
def give_web_search_suggestion(query):
174+
"""
175+
Retrieve search suggestions from DuckDuckGo.com.
176+
177+
Args:
178+
query (str): The text to retrieve suggestions for.
179+
180+
Returns:
181+
List of search suggestions as strings.
182+
"""
183+
with DDGS() as ddgs:
184+
results = [r for r in ddgs.suggestions(query)]
185+
return results
186+
187+
@staticmethod
188+
def user_proxy_for_text_web_search(query: str, timeout: Optional[int] = 20, max_results: Optional[int] = 5):
189+
"""
190+
Search for text on DuckDuckGo.com using a user-defined proxy.
191+
192+
Args:
193+
query (str): The text to search for.
194+
timeout Optional[int]: The timeout for the request in seconds (default 20).
195+
max_results Optional[int]: The maximum number of search results to retrieve (default 50).
196+
197+
Returns:
198+
List of search results as strings.
199+
"""
200+
with DDGS(proxies="socks5://localhost:9150", timeout=timeout) as ddgs:
201+
results = [r for r in ddgs.text(query, max_results=max_results)]
202+
return results
203+
204+
if __name__ == "__main__":
205+
response = WebSearch.retrieve_web_search_results(query="Who win the ipl 2025?")
206+
print(response)

0 commit comments

Comments
 (0)