Skip to content

Commit bc172e5

Browse files
authored
create FAISS content page with search functionality (#115)
Signed-off-by: sancheet230 <sancheet.p24@medhaviskillsuniversity.edu.in>
1 parent 5de88e2 commit bc172e5

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

src/mvt/menu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def authenticated_menu():
2222
st.sidebar.page_link("pages/prompt_management.py", label="Prompt Management")
2323
st.sidebar.page_link("pages/user_management.py", label="User Management")
2424
st.sidebar.page_link("pages/admin_responses.py", label="Admin Responses")
25+
st.sidebar.page_link("pages/vector_db_content.py", label="DB Content")
2526
st.sidebar.page_link("app.py", label="About")
2627

2728
def unauthenticated_menu():

src/mvt/pages/vector_db_content.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import streamlit as st
2+
import os
3+
from utils import load_yaml_file_with_db_prompts
4+
from dotenv import load_dotenv, find_dotenv
5+
from langchain_community.vectorstores import FAISS
6+
from langchain_mistralai.embeddings import MistralAIEmbeddings
7+
from menu import menu_with_redirect
8+
9+
menu_with_redirect()
10+
11+
# only allow admin users
12+
if st.session_state.user_type not in ["admin"]:
13+
st.warning("You do not have permission to view this page.")
14+
st.stop()
15+
16+
st.markdown("# Vector DB Content (FAISS)")
17+
18+
# load the configuration and environment variables
19+
config_data = load_yaml_file_with_db_prompts("config.yaml")
20+
load_dotenv(find_dotenv())
21+
22+
# set up the embeddings model based on the provider
23+
if config_data["llm_provider"] == "mistral":
24+
embeddings = MistralAIEmbeddings(
25+
model=config_data["embedding_model"],
26+
mistral_api_key=os.getenv("MISTRALAI_API_KEY")
27+
)
28+
else:
29+
from langchain_openai import OpenAIEmbeddings
30+
embeddings = OpenAIEmbeddings(
31+
openai_api_key=os.getenv("OPENAI_API_KEY")
32+
)
33+
34+
# load the FAISS vector database from disk
35+
persist_dir = config_data["persist_directory"]
36+
try:
37+
vectordb = FAISS.load_local(
38+
persist_dir,
39+
embeddings,
40+
allow_dangerous_deserialization=True
41+
)
42+
st.success(f"Loaded FAISS content from '{persist_dir}'")
43+
except Exception as e:
44+
st.error(f"Could not load FAISS content: {e}")
45+
st.stop()
46+
47+
# get all documents from the vector database (may be slow if many documents)
48+
try:
49+
all_docs = list(vectordb.docstore._dict.values())
50+
except Exception as e:
51+
st.error(f"Could not access documents in FAISS index: {e}")
52+
st.stop()
53+
54+
search_term = st.text_input("Search in vector DB content:", "")
55+
56+
# Check if a document matches the search term
57+
def doc_matches(doc, term):
58+
if not term:
59+
return True
60+
terms = [t.strip().lower() for t in term.split() if t.strip()]
61+
content = getattr(doc, 'page_content', '').lower()
62+
metadata_values = [str(v).lower() for v in getattr(doc, 'metadata', {}).values()]
63+
for t in terms:
64+
if t not in content and not any(t in v for v in metadata_values):
65+
return False
66+
return True
67+
68+
filtered_docs = [doc for doc in all_docs if doc_matches(doc, search_term)]
69+
70+
st.markdown(f"### Showing {len(filtered_docs)} / {len(all_docs)} documents")
71+
72+
# show each document with its metadata and a snippet of its content
73+
for i, doc in enumerate(filtered_docs):
74+
with st.expander(f"Document {i+1}"):
75+
st.write("**Metadata:**", doc.metadata)
76+
snippet = doc.page_content[:500] + ("..." if len(doc.page_content) > 500 else "")
77+
st.write("**Content snippet:**")
78+
st.code(snippet)
79+
with st.expander("Show full content"):
80+
st.write(doc.page_content)

0 commit comments

Comments
 (0)