Skip to content

Commit b3cdc22

Browse files
authored
Read prompts from external files and added example config files (#140)
Signed-off-by: Gianluca Capuzzi <gianluca.posta78@gmail.com>
1 parent 2ff3abd commit b3cdc22

File tree

7 files changed

+36
-11
lines changed

7 files changed

+36
-11
lines changed

src/mvt/config.yaml.example

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
llm_provider: "mistral" # Options: "openai" or "mistral"
2+
model_name: "mistral-small-2506" # for OpenAI: "gpt-3.5-turbo" or "gpt-4", for Mistral: "mistral-medium" or "mistral-large-latest"
3+
embedding_model: "mistral-embed" # "text-embedding-ada-002" for OpenAI or "mistral-embed" for Mistral
4+
dataset_private_path: "./dataset/private"
5+
dataset_public_path: "./dataset/public"
6+
web_urls: "web_urls"
7+
yt_video_links: "yt_video_links"
8+
text_files: "text_files"
9+
pdf_files: "pdf_files"
10+
rtdocs_files: "rtdocs_files"
11+
html_files: "html_files"
12+
persist_directory: "faiss_index"
13+
host: "127.0.0.1"
14+
port: 8080
15+
system_prompt: "./system_prompt.txt"
16+
use_query_rewriting: false # Set to true to enable query rewriting
17+
query_rewriting_prompt: "./query_rewriting_prompt.txt"
18+
prefix_prompt: "I will answer with care, respect, and truth. I will respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity. Avoid response like 'Based on the provided context'"
19+
logo_pth: "https://upload.wikimedia.org/wikipedia/en/thumb/e/e2/The_Founder_Institute_Logo.png/250px-The_Founder_Institute_Logo.png"
20+
nr_retrieved_documents: 5 # Number of documents to retrieve for each query
21+
max_download_retries: 3 # Maximum number of retries for downloading documents
22+
retry_delay_seconds: 2 # Delay in seconds between retries

src/mvt/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from utils import load_yaml_file_with_db_prompts, get_system_prompt
2+
from utils import load_yaml_file_with_db_prompts, get_prompt_from_file
33
from dotenv import load_dotenv, find_dotenv
44
from langchain_mistralai.chat_models import ChatMistralAI
55
from langchain_community.vectorstores import FAISS
@@ -60,7 +60,7 @@ def get_ragchain(filter):
6060
retriever = docsearch.as_retriever(search_kwargs={"k": config_data["nr_retrieved_documents"], "filter": filter})
6161

6262
# read prompt string from config file
63-
prompt_str = get_system_prompt(config_data["system_prompt"])
63+
prompt_str = get_prompt_from_file(config_data["system_prompt"])
6464

6565
# Answer question
6666
qa_system_prompt = (

src/mvt/pages/prompt_management.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import streamlit as st
22
import yaml
3-
from utils import load_yaml_file, load_yaml_file_with_db_prompts
3+
from utils import load_yaml_file, load_yaml_file_with_db_prompts, get_prompt_from_file
44
from database import create_connection, create_prompts_table, save_prompt, get_prompt
55
from menu import menu_with_redirect
66

@@ -31,8 +31,8 @@ def load_default_prompts():
3131
try:
3232
config_data = load_yaml_file("config.yaml")
3333
return {
34-
"system_prompt": config_data.get("system_prompt", ""),
35-
"query_rewriting_prompt": config_data.get("query_rewriting_prompt", "")
34+
"system_prompt": get_prompt_from_file(config_data.get("system_prompt", "")),
35+
"query_rewriting_prompt": get_prompt_from_file(config_data.get("query_rewriting_prompt", ""))
3636
}
3737
except Exception as e:
3838
st.error(f"Error loading config file: {e}")
@@ -90,8 +90,8 @@ def load_prompts_from_db():
9090
# Use the utility function that loads from database with config fallback
9191
config_data = load_yaml_file_with_db_prompts("config.yaml")
9292
st.session_state.current_prompts = {
93-
"system_prompt": config_data.get("system_prompt", ""),
94-
"query_rewriting_prompt": config_data.get("query_rewriting_prompt", "")
93+
"system_prompt": get_prompt_from_file(config_data.get("system_prompt", "")),
94+
"query_rewriting_prompt": get_prompt_from_file(config_data.get("query_rewriting_prompt", ""))
9595
}
9696

9797
# Create columns for better layout

src/mvt/query_rewriting.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from utils import load_yaml_file_with_db_prompts
2+
from utils import load_yaml_file_with_db_prompts, get_prompt_from_file
33
from dotenv import load_dotenv, find_dotenv
44
from langchain_mistralai.chat_models import ChatMistralAI
55

@@ -39,8 +39,7 @@ def query_rewriting_llm(user_query, context="Founder Institute Keystone Chapter"
3939
)
4040

4141
# Read query rewriting prompt from config
42-
with open(config_data["query_rewriting_prompt"], "r") as f:
43-
query_rewriting_prompt = f.read().strip()
42+
query_rewriting_prompt = get_prompt_from_file(config_data["query_rewriting_prompt"])
4443

4544
messages = [
4645
("system", query_rewriting_prompt),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
You are an assistant helping to rewrite user queries to make them more specific and effective for searching documents. The context is the Linux Foundation Decentralized Trust open-source blockchain community. Please rewrite the human query to be more specific, detailed, and optimized for document retrieval, considering the context mentioned.

src/mvt/system_prompt.txt.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
You are an assistant for question-answering tasks that are related to Linux Foundation Decentralized Trust, former Hyperledger, open-source blockchain community. Use the following pieces of retrieved context, if any, to answer the question. If you don't know the answer, say that you don't know.

src/mvt/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,12 @@ def load_yaml_file_with_db_prompts(path):
4242
return data
4343

4444
# This function returns the system prompt from the file specified in the config.
45-
def get_system_prompt(prompt_file: str):
45+
def get_prompt_from_file(prompt_file: str):
4646
with open(prompt_file, "r") as f:
4747
return f.read().strip()
4848

49+
50+
4951
# This function extracts text from HTML while preserving context relationships.
5052
def bs4_html(html):
5153
"""Extract text from HTML while preserving context relationships"""

0 commit comments

Comments
 (0)