jenkinsci · Yugansh5013 · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026
@@ -84,7 +84,7 @@ def generate(self, prompt: str, max_tokens: int) -> str:
                 "Check the model path."
             ) from exc
 
-        except Exception:  # pylint: disable=broad-exception-caught
+        except Exception as exc:  # pylint: disable=broad-exception-caught
             logger.error(
                 "Unexpected error during LLM generation: %s",
                 exc,
@@ -137,7 +137,7 @@ def _stream_generator():
             logger.error("Invalid model configuration: %s", exc)
             yield "Sorry, model configuration error."
 
-        except Exception:  # pylint: disable=broad-exception-caught
+        except Exception :  # pylint: disable=broad-exception-caught
             logger.error(
                 "Unexpected error during LLM streaming. "
                 "Prompt preview: %s...",

@@ -2,18 +2,25 @@
 Constructs the prompt used for querying the LLM, including system-level instructions,
 chat history, context retrieved from the knowledge base, and the user's question.
 """
-
+from typing import Optional
 from langchain.memory import ConversationBufferMemory
-from api.prompts.prompts import SYSTEM_INSTRUCTION
-
-def build_prompt(user_query: str, context: str, memory: ConversationBufferMemory) -> str:
+from api.prompts.prompts import SYSTEM_INSTRUCTION, LOG_ANALYSIS_INSTRUCTION
+
+def build_prompt(
+    user_query: str,
+    context: str,
+    memory: ConversationBufferMemory,
+    log_context: Optional[str] = None
+) -> str:
     """
-    Build the full prompt by combining system instructions, chat history, context,and user question.
+    Build the full prompt by combining system instructions, chat history, context,
+    user question, and optional log data.
 
     Args:
         user_query (str): The raw question from the user.
         context (str): The relevant retrieved chunks to ground the answer.
         memory (ConversationBufferMemory): LangChain memory holding prior chat turns.
+        log_context (Optional[str]): Raw logs provided by the user (e.g. build failure logs).
 
     Returns:
         str: A structured prompt for the language model.
@@ -22,17 +29,29 @@ def build_prompt(user_query: str, context: str, memory: ConversationBufferMemory
         history = "\n".join(
             f"{'User' if msg.type == 'human' else 'Jenkins Assistant'}: {msg.content or ''}"
             for msg in memory.chat_memory.messages
-        ) if memory.chat_memory.messages  else ""
+        ) if memory.chat_memory.messages else ""
     else:
         history = ""
 
-    prompt = f"""{SYSTEM_INSTRUCTION}
+    # If log context exists, we append it as a specific section
+    if log_context:
+        system_prompt = LOG_ANALYSIS_INSTRUCTION
+        log_section = f"""
+            User-Provided Log Data:
+            {log_context}
+            """
+    else:
+        # Otherwise, use the standard Friendly Assistant prompt
+        system_prompt = SYSTEM_INSTRUCTION
+        log_section = ""
+
+    prompt = f"""{system_prompt}
             Chat History:
             {history}
 
-            Context:
+            Context (Documentation & Knowledge Base):
             {context}
-
+            {log_section}
             User Question:
             {user_query.strip()}
 

@@ -44,7 +44,7 @@
 ###
 Here are some examples:
 
-Query: How do I install Jenkins on Ubuntu? 
+Query: How do I install Jenkins on Ubuntu?
 Answer: SIMPLE
 
 Query: How do I install Jenkins and configure it to use the GitHub plugin?
@@ -258,3 +258,36 @@
 
 Relevance Analysis:
 """
+LOG_ANALYSIS_INSTRUCTION = """
+You are an expert Jenkins Log Analyzer.
+
+Your SOLE goal is to identify the root cause of the build failure based on the provided logs.
+
+PRIORITY RULES:
+1. Focus 100% on the "User-Provided Log Data".
+2. Identify specific error messages, exceptions, or exit codes (e.g., "Build step 'Execute Windows batch command' marked build as failure").
+3. Only use the "Context" (documentation) if it helps explain the specific error found in the logs. If the context is unrelated, IGNORE IT.
+4. Do not be conversational. Go straight to the point: "The build failed because..."
+
+If the logs do not show a clear error, state: "I cannot find a specific error in the provided logs."
+"""
+LOG_SUMMARY_PROMPT = """
+You are an expert system debugger.
+Your task is to extract a concise search query from the provided build logs.
+Identify the specific error message, exception type, or exit code.
+
+Do not explain the error. Just output the error signature.
+
+Example 1:
+Logs: "Check failed: 'test_pass'. Exit code 1."
+Output: Build failure exit code 1
+
+Example 2:
+Logs: "Exception in thread main java.lang.NullPointerException at com.example..."
+Output: java.lang.NullPointerException
+
+Logs:
+{log_data}
+
+Search Query:
+"""
@@ -15,6 +15,7 @@
     QUERY_CLASSIFIER_PROMPT,
     RETRIEVER_AGENT_PROMPT,
     SPLIT_QUERY_PROMPT,
+    LOG_SUMMARY_PROMPT,
 )
 from api.services.memory import get_session
 from api.services.file_service import format_file_context
@@ -32,6 +33,11 @@
 retrieval_config = CONFIG["retrieval"]
 CODE_BLOCK_PLACEHOLDER_PATTERN = r"\[\[(?:CODE_BLOCK|CODE_SNIPPET)_(\d+)\]\]"
 
+LOG_ANALYSIS_PATTERN = re.compile(
+    r"Here are the last \d+ characters of the log:\s*```\s*(.*?)\s*```\s*(.*)",
+    re.DOTALL
+)
+
 
 def get_chatbot_reply(
     session_id: str,
@@ -53,43 +59,60 @@ def get_chatbot_reply(
     logger.info("New message from session '%s'", session_id)
     logger.info("Handling the user query: %s", user_input)
 
-    if files:
-        logger.info("Processing %d uploaded file(s)", len(files))
-
     memory = get_session(session_id)
     if memory is None:
-        raise RuntimeError(
-            f"Session '{session_id}' not found in the memory store.")
+        raise RuntimeError(f"Session '{session_id}' not found in the memory store.")
 
     context = retrieve_context(user_input)
     logger.info("Context retrieved: %s", context)
 
-    # Add file context if files are provided
-    file_context = ""
-    if files:
-        file_dicts = [file.model_dump() for file in files]
-        file_context = format_file_context(file_dicts)
-        if file_context:
-            logger.info("File context added: %d characters", len(file_context))
-            context = f"{context}\n\n[User Uploaded Files]\n{file_context}"
+    # Process file context if files are provided
+    context = _process_file_context(context, files)
 
     prompt = build_prompt(user_input, context, memory)
 
     logger.info("Generating answer with prompt: %s", prompt)
     reply = generate_answer(prompt)
 
-    # Include file info in memory message
-    user_message = user_input
-    if files:
-        file_names = [f.filename for f in files]
-        user_message = f"{user_input}\n[Attached files: {', '.join(file_names)}]"
+    # Format user message with file info for memory
+    user_message = _format_user_message_for_memory(user_input, files)
 
     memory.chat_memory.add_user_message(user_message)
     memory.chat_memory.add_ai_message(reply)
 
     return ChatResponse(reply=reply)
 
 
+def _process_file_context(context: str, files: Optional[List[FileAttachment]]) -> str:
+    """
+    Helper function to process uploaded files and append them to the context.
+    """
+    if not files:
+        return context
+
+    logger.info("Processing %d uploaded file(s)", len(files))
+    file_dicts = [file.model_dump() for file in files]
+    file_context = format_file_context(file_dicts)
+
+    if file_context:
+        logger.info("File context added: %d characters", len(file_context))
+        return f"{context}\n\n[User Uploaded Files]\n{file_context}"
+
+    return context
+
+
+def _format_user_message_for_memory(user_input: str, files: Optional[List[FileAttachment]]) -> str:
+    """
+    Helper function to format the user message for memory storage,
+    appending the names of attached files.
+    """
+    if not files:
+        return user_input
+
+    file_names = [f.filename for f in files]
+    return f"{user_input}\n[Attached files: {', '.join(file_names)}]"
+
+
 def get_chatbot_reply_new_architecture(
         session_id: str,
         user_input: str) -> ChatResponse:
@@ -504,3 +527,16 @@ def _extract_relevance_score(response: str) -> str:
         relevance_score = 0
 
     return relevance_score
+
+def _generate_search_query_from_logs(log_text: str) -> str:
+    """
+    Uses the LLM to extract a concise error signature from the logs
+    to use as a search query for the vector database.
+    """
+    # Use .format() directly since we are using generate_answer
+    prompt = LOG_SUMMARY_PROMPT.format(log_data=log_text)
+
+    # Generate response using the existing function in this file
+    search_query = generate_answer(prompt)
+
+    return search_query.strip()
@@ -0,0 +1,39 @@
+"""Module for sanitizing logs by redacting sensitive information."""
+import re
+
+def sanitize_logs(log_text: str) -> str:
+    """
+    Scans the input text for common secret patterns (API keys, passwords, tokens)
+    and replaces them with [REDACTED].
+    """
+    patterns = [
+        # Generic "password=" or "pwd=" patterns (case insensitive)
+        (
+            r'(?i)(password|passwd|pwd|secret|access_token|api_key|client_secret)'
+            r'\s*[:=]\s*([^\s]+)',
+            r'\1=[REDACTED]'
+        ),
+
+        # AWS Access Key ID (AKI...)
+        (r'(?<![A-Z0-9])[A-Z0-9]{20}(?![A-Z0-9])', r'[REDACTED_AWS_KEY]'),
+
+        # Generic Bearer Token
+        (r'(?i)(Bearer)\s+[a-zA-Z0-9\-\._~+/]+=*', r'\1 [REDACTED_TOKEN]'),
+
+        # GitHub Tokens (ghp_...)
+        (r'ghp_[a-zA-Z0-9]{36}', r'[REDACTED_GITHUB_TOKEN]'),
+
+        # Private Key Blocks
+        (r'-----BEGIN [A-Z]+ PRIVATE KEY-----.*?-----END [A-Z]+ PRIVATE KEY-----',
+         r'[REDACTED_PRIVATE_KEY]'
+         ),
+
+        # Docker Login Flags (-p password)
+        (r'(docker\s+login.*?-p\s+)([^\s]+)', r'\1[REDACTED]')
+    ]
+
+    sanitized_text = log_text
+    for pattern, replacement in patterns:
+        sanitized_text = re.sub(pattern, replacement, sanitized_text, flags=re.DOTALL)
+
+    return sanitized_text
diff --git a/chatbot-core/rag/embedding/bm25_indexer.py b/chatbot-core/rag/embedding/bm25_indexer.py
@@ -104,5 +104,6 @@ def get(self, index_name: str):
         logger= LoggerFactory.instance().get_logger("bm25indexer")
     )
 
-if not CONFIG["is_test_mode"]:
-    indexer.build()
+if __name__ == "__main__":
+    if not CONFIG["is_test_mode"]:
+        indexer.build()
@@ -35,8 +35,8 @@ def test_save_chunks_handles_error(mocker):
     def raise_oserror(*args, **kwargs):
         raise OSError("Disk full")
 
-    with mocker.patch("builtins.open", side_effect=raise_oserror):
-        save_chunks(fake_path, data, logger)
+    mocker.patch("builtins.open", side_effect=raise_oserror)
+    save_chunks(fake_path, data, logger)
 
     logger.error.assert_called_once()
     assert "File error while writing" in logger.error.call_args[0][0]

@@ -79,16 +79,20 @@ def test_build_prompt_with_none_memory():
 def get_prompt_indexes(prompt: str) -> tuple[int, int, int, int]:
     """Helper to extract section positions in the prompt."""
     chat_idx = prompt.index("Chat History:")
-    context_idx = prompt.index("Context:")
+    context_idx = prompt.index("Context (Documentation & Knowledge Base):")
     question_idx = prompt.index("User Question:")
     answer_idx = prompt.index("Answer:")
 
     return chat_idx, context_idx, question_idx, answer_idx
 
 def get_prompt_sections(prompt: str) -> tuple[str, str, str]:
     """Helper to extract prompt sections by label."""
-    history_section = prompt.split("Chat History:")[1].split("Context:")[0]
-    context_section = prompt.split("Context:")[1].split("User Question:")[0]
+    history_part = prompt.split("Chat History:")[1]
+    history_section = history_part.split("Context (Documentation & Knowledge Base):")[0]
+
+    context_part = prompt.split("Context (Documentation & Knowledge Base):")[1]
+    context_section = context_part.split("User Question:")[0]
+
     question_section = prompt.split("User Question:")[1].split("Answer:")[0]
 
     return history_section, context_section, question_section