Read prompts from external files and added example config files (#140)

gcapuzzi · web-flow · commit b3cdc229a0bb · 2025-07-14T17:02:01.000+02:00
Signed-off-by: Gianluca Capuzzi &lt;gianluca.posta78@gmail.com&gt;
diff --git a/src/mvt/config.yaml.example b/src/mvt/config.yaml.example
@@ -0,0 +1,22 @@
+llm_provider: "mistral" # Options: "openai" or "mistral"
+model_name: "mistral-small-2506" # for OpenAI: "gpt-3.5-turbo" or "gpt-4", for Mistral: "mistral-medium" or "mistral-large-latest"
+embedding_model: "mistral-embed" # "text-embedding-ada-002" for OpenAI or "mistral-embed" for Mistral
+dataset_private_path: "./dataset/private"
+dataset_public_path: "./dataset/public"
+web_urls: "web_urls"
+yt_video_links: "yt_video_links"
+text_files: "text_files"
+pdf_files: "pdf_files"
+rtdocs_files: "rtdocs_files"
+html_files: "html_files"
+persist_directory: "faiss_index"
+host: "127.0.0.1"
+port: 8080
+system_prompt: "./system_prompt.txt"
+use_query_rewriting: false # Set to true to enable query rewriting
+query_rewriting_prompt: "./query_rewriting_prompt.txt"
+prefix_prompt: "I will answer with care, respect, and truth. I will respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity. Avoid response like 'Based on the provided context'"
+logo_pth: "https://upload.wikimedia.org/wikipedia/en/thumb/e/e2/The_Founder_Institute_Logo.png/250px-The_Founder_Institute_Logo.png"
+nr_retrieved_documents: 5 # Number of documents to retrieve for each query
+max_download_retries: 3 # Maximum number of retries for downloading documents
+retry_delay_seconds: 2 # Delay in seconds between retries
diff --git a/src/mvt/main.py b/src/mvt/main.py
@@ -1,5 +1,5 @@
 import os
-from utils import load_yaml_file_with_db_prompts, get_system_prompt
+from utils import load_yaml_file_with_db_prompts, get_prompt_from_file
 from dotenv import load_dotenv, find_dotenv
 from langchain_mistralai.chat_models import ChatMistralAI
 from langchain_community.vectorstores import FAISS
@@ -60,7 +60,7 @@ def get_ragchain(filter):
     retriever = docsearch.as_retriever(search_kwargs={"k": config_data["nr_retrieved_documents"], "filter": filter})
 
     # read prompt string from config file
-    prompt_str = get_system_prompt(config_data["system_prompt"])
+    prompt_str = get_prompt_from_file(config_data["system_prompt"])
 
     # Answer question
     qa_system_prompt = (
diff --git a/src/mvt/pages/prompt_management.py b/src/mvt/pages/prompt_management.py
@@ -1,6 +1,6 @@
 import streamlit as st
 import yaml
-from utils import load_yaml_file, load_yaml_file_with_db_prompts
+from utils import load_yaml_file, load_yaml_file_with_db_prompts, get_prompt_from_file
 from database import create_connection, create_prompts_table, save_prompt, get_prompt
 from menu import menu_with_redirect
 
@@ -31,8 +31,8 @@ def load_default_prompts():
     try:
         config_data = load_yaml_file("config.yaml")
         return {
-            "system_prompt": config_data.get("system_prompt", ""),
-            "query_rewriting_prompt": config_data.get("query_rewriting_prompt", "")
+            "system_prompt": get_prompt_from_file(config_data.get("system_prompt", "")),
+            "query_rewriting_prompt": get_prompt_from_file(config_data.get("query_rewriting_prompt", ""))
         }
     except Exception as e:
         st.error(f"Error loading config file: {e}")
@@ -90,8 +90,8 @@ def load_prompts_from_db():
     # Use the utility function that loads from database with config fallback
     config_data = load_yaml_file_with_db_prompts("config.yaml")
     st.session_state.current_prompts = {
-        "system_prompt": config_data.get("system_prompt", ""),
-        "query_rewriting_prompt": config_data.get("query_rewriting_prompt", "")
+        "system_prompt": get_prompt_from_file(config_data.get("system_prompt", "")),
+        "query_rewriting_prompt": get_prompt_from_file(config_data.get("query_rewriting_prompt", ""))
     }
 
 # Create columns for better layout
diff --git a/src/mvt/query_rewriting.py b/src/mvt/query_rewriting.py
@@ -1,5 +1,5 @@
 import os
-from utils import load_yaml_file_with_db_prompts
+from utils import load_yaml_file_with_db_prompts, get_prompt_from_file
 from dotenv import load_dotenv, find_dotenv
 from langchain_mistralai.chat_models import ChatMistralAI
 
@@ -39,8 +39,7 @@ def query_rewriting_llm(user_query, context="Founder Institute Keystone Chapter"
         )
 
     # Read query rewriting prompt from config
-    with open(config_data["query_rewriting_prompt"], "r") as f:
-        query_rewriting_prompt = f.read().strip()
+    query_rewriting_prompt = get_prompt_from_file(config_data["query_rewriting_prompt"])
 
     messages = [
         ("system", query_rewriting_prompt),
diff --git a/src/mvt/query_rewriting_prompt.txt.example b/src/mvt/query_rewriting_prompt.txt.example
@@ -0,0 +1 @@
+You are an assistant helping to rewrite user queries to make them more specific and effective for searching documents. The context is the Linux Foundation Decentralized Trust open-source blockchain community. Please rewrite the human query to be more specific, detailed, and optimized for document retrieval, considering the context mentioned.
diff --git a/src/mvt/system_prompt.txt.example b/src/mvt/system_prompt.txt.example
@@ -0,0 +1 @@
+You are an assistant for question-answering tasks that are related to Linux Foundation Decentralized Trust, former Hyperledger, open-source blockchain community. Use the following pieces of retrieved context, if any, to answer the question. If you don't know the answer, say that you don't know.
diff --git a/src/mvt/utils.py b/src/mvt/utils.py
@@ -42,10 +42,12 @@ def load_yaml_file_with_db_prompts(path):
     return data
 
 # This function returns the system prompt from the file specified in the config.
-def get_system_prompt(prompt_file: str):
+def get_prompt_from_file(prompt_file: str):
     with open(prompt_file, "r") as f:
         return f.read().strip()
 
+
+
 # This function extracts text from HTML while preserving context relationships.
 def bs4_html(html):
     """Extract text from HTML while preserving context relationships"""

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+You are an assistant helping to rewrite user queries to make them more specific and effective for searching documents. The context is the Linux Foundation Decentralized Trust open-source blockchain community. Please rewrite the human query to be more specific, detailed, and optimized for document retrieval, considering the context mentioned.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+You are an assistant for question-answering tasks that are related to Linux Foundation Decentralized Trust, former Hyperledger, open-source blockchain community. Use the following pieces of retrieved context, if any, to answer the question. If you don't know the answer, say that you don't know.`