diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bc4920e..b559ba7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,14 +1,13 @@ name: CI - on: push: branches: - main - dev + - agents pull_request: branches: - "**" - jobs: format-frontend: name: 'Format Frontend' @@ -16,40 +15,58 @@ jobs: steps: - name: Checkout Repository uses: actions/checkout@v4 - + + - name: Check if Frontend Directory Exists + id: check_frontend_dir + run: | + if [ -d "src/frontend" ]; then + echo "frontend_dir_exists=true" >> $GITHUB_OUTPUT + else + echo "frontend_dir_exists=false" >> $GITHUB_OUTPUT + fi + - name: Prettify code + if: steps.check_frontend_dir.outputs.frontend_dir_exists == 'true' uses: creyD/prettier_action@v4.3 with: prettier_options: --write src/frontend/**/*.{ts,tsx} - + - name: Post Formatting Check run: git diff --exit-code - + format-core: name: 'Format Core' runs-on: ubuntu-latest - strategy: matrix: python-version: [3.12] - steps: - name: Checkout Repository uses: actions/checkout@v4 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - + - name: Install Dependencies run: | python -m pip install --upgrade pip pip install black - + + - name: Check if Core Directory Exists + id: check_core_dir + run: | + if [ -d "src/core" ]; then + echo "core_dir_exists=true" >> $GITHUB_OUTPUT + else + echo "core_dir_exists=false" >> $GITHUB_OUTPUT + fi + - name: Format Backend + if: steps.check_core_dir.outputs.core_dir_exists == 'true' run: | black src/core - + - name: Post Formatting Check run: git diff --exit-code diff --git a/src/mvt/config.yaml b/src/mvt/config.yaml index 87782a0..a0c269f 100644 --- a/src/mvt/config.yaml +++ b/src/mvt/config.yaml @@ -16,3 +16,4 @@ system_prompt: "You are an assistant for question-answering tasks that are relat use_query_rewriting: false # Set to true to enable query rewriting query_rewriting_prompt: "You are an assistant helping to rewrite user queries to make them more specific and effective for searching documents. The context is the Linux Foundation Decentralized Trust former Hyperledger. Please rewrite the human query to be more specific, detailed, and optimized for document retrieval, considering the context mentioned." logo_pth: "https://upload.wikimedia.org/wikipedia/en/thumb/e/e2/The_Founder_Institute_Logo.png/250px-The_Founder_Institute_Logo.png" +nr_retrieved_documents: 5 \ No newline at end of file diff --git a/src/mvt/main.py b/src/mvt/main.py index 8e5e28d..c443fdc 100644 --- a/src/mvt/main.py +++ b/src/mvt/main.py @@ -47,7 +47,7 @@ def get_ragchain(filter): docsearch = FAISS.load_local(config_data["persist_directory"], embeddings, allow_dangerous_deserialization=True) # Define a retriever interface - retriever = docsearch.as_retriever(search_kwargs={"k": 5, "filter": filter}) + retriever = docsearch.as_retriever(search_kwargs={"k": config_data["nr_retrieved_documents"], "filter": filter}) # read prompt string from config file prompt_str = config_data["system_prompt"] diff --git a/src/mvt/menu.py b/src/mvt/menu.py index 1f39544..b7cd644 100644 --- a/src/mvt/menu.py +++ b/src/mvt/menu.py @@ -21,6 +21,7 @@ def authenticated_menu(): st.sidebar.page_link("pages/build_knowledgebase.py", label="Build Knowledge Base") st.sidebar.page_link("pages/prompt_management.py", label="Prompt Management") st.sidebar.page_link("pages/user_management.py", label="User Management") + st.sidebar.page_link("pages/admin_responses.py", label="Admin Responses") st.sidebar.page_link("app.py", label="About") def unauthenticated_menu(): diff --git a/src/mvt/pages/admin_responses.py b/src/mvt/pages/admin_responses.py new file mode 100644 index 0000000..3f6800d --- /dev/null +++ b/src/mvt/pages/admin_responses.py @@ -0,0 +1,156 @@ +import streamlit as st +import os +import re +from menu import menu_with_redirect +from utils import load_yaml_file_with_db_prompts + +# Redirect to app.py if not logged in, otherwise show the navigation menu +menu_with_redirect() + +# Verify the user's role +if st.session_state.user_type not in ["admin"]: + st.warning("You do not have permission to view this page.") + st.stop() + +st.markdown("# Admin Responses") +st.markdown("View previously asked user questions, AI-generated answers, and source documents.") + +def parse_responses_file(): + """Parse the responses.txt file and return a list of question-answer pairs with context""" + responses_file = "responses.txt" + if not os.path.exists(responses_file): + return [] + + responses = [] + try: + with open(responses_file, 'r', encoding='utf-8') as f: + lines = f.read().strip().split('\n') + + for line in lines: + if line.strip(): + parsed_response = parse_response_line(line) + if parsed_response: + responses.append(parsed_response) + + except Exception as e: + st.error(f"Error reading responses file: {str(e)}") + return [] + + return responses + +def parse_response_line(line): + """Parse a single response line that contains Document objects""" + try: + # Extract the input (question) + input_match = re.search(r"'input': '((?:[^'\\]|\\.)*)'", line) + question = input_match.group(1) if input_match else "No question found" + question = question.replace("\\'", "'").replace("\\n", "\n") + + # Extract the answer + answer_match = re.search(r"'answer': '((?:[^'\\]|\\.)*)'(?=\})", line) + answer = answer_match.group(1) if answer_match else "No answer found" + answer = answer.replace("\\'", "'").replace("\\n", "\n") + + # Extract documents from context + documents = [] + doc_pattern = r"Document\(id='([^']*)', metadata=\{([^}]*)\}, page_content='((?:[^'\\]|\\.)*)'\)" + doc_matches = re.findall(doc_pattern, line) + + for doc_match in doc_matches: + doc_id, metadata_str, content = doc_match + + # Parse metadata + metadata = {'id': doc_id} + metadata_pairs = re.findall(r"'([^']*)': '([^']*)'", metadata_str) + for key, value in metadata_pairs: + metadata[key] = value + + documents.append({ + 'id': doc_id, + 'metadata': metadata, + 'page_content': content.replace("\\'", "'").replace("\\n", "\n") + }) + + return { + 'input': question, + 'answer': answer, + 'context': documents + } + + except Exception as e: + return { + 'input': "Error parsing question", + 'answer': f"Error parsing answer: {str(e)[:100]}", + 'context': [] + } + +def display_source_document(doc, index): + """Display a source document with metadata""" + st.markdown(f"**:page_facing_up: Document {index + 1}**") + + # Display content + content = doc.get('page_content', '') + st.markdown("**Content:**") + st.text(content) + + # Display metadata + st.markdown("**Metadata:**") + metadata = doc.get('metadata', {}) + + for key, value in metadata.items(): + st.write(f"- **{key.title()}:** {value}") + +# Load and display responses +config_data = load_yaml_file_with_db_prompts("config.yaml") +k_value = config_data.get("nr_retrieved_documents") +print(k_value) + +responses = parse_responses_file() + +if responses: + st.markdown(f"### Overview ({len(responses)} responses found)") + + # Search functionality + search_term = st.text_input("Search questions or answers:", placeholder="Enter search term...") + + # Filter responses based on search term + if search_term: + display_responses = [ + response for response in responses + if search_term.lower() in response.get('input', '').lower() or + search_term.lower() in response.get('answer', '').lower() + ] + st.success(f"Found {len(display_responses)} matching responses") + else: + display_responses = responses + + st.markdown("---") + + # Display each response + for i, response in enumerate(reversed(display_responses)): + question = response.get('input', 'No question found') + answer = response.get('answer', 'No answer found') + context = response.get('context', []) + + # Display question in an expander + question_preview = question[:80] + '...' if len(question) > 80 else question + with st.expander(f"**Q{len(display_responses) - i}:** {question_preview}", expanded=False): + if len(question) > 80: + st.markdown(f"**Full Question:** {question}") + + st.markdown("**Answer:**") + st.markdown(answer) + + # Display source documents + if context: + displayed_docs = context[:k_value] if k_value else context + st.markdown(f"**Source Documents ({len(displayed_docs)} of {len(context)}):**") + + for doc_idx, doc in enumerate(displayed_docs): + display_source_document(doc, doc_idx) + if doc_idx < len(displayed_docs) - 1: + st.markdown("---") + else: + st.info("No source documents found.") +else: + st.info("No responses found. The responses.txt file is empty or doesn't exist.") \ No newline at end of file