Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions chatbot-core/api/models/llama_cpp_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def generate(self, prompt: str, max_tokens: int) -> str:
"Check the model path."
) from exc

except Exception: # pylint: disable=broad-exception-caught
except Exception as exc: # pylint: disable=broad-exception-caught
logger.error(
"Unexpected error during LLM generation: %s",
exc,
Expand Down Expand Up @@ -137,7 +137,7 @@ def _stream_generator():
logger.error("Invalid model configuration: %s", exc)
yield "Sorry, model configuration error."

except Exception: # pylint: disable=broad-exception-caught
except Exception : # pylint: disable=broad-exception-caught
logger.error(
"Unexpected error during LLM streaming. "
"Prompt preview: %s...",
Expand Down
37 changes: 28 additions & 9 deletions chatbot-core/api/prompts/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,25 @@
Constructs the prompt used for querying the LLM, including system-level instructions,
chat history, context retrieved from the knowledge base, and the user's question.
"""

from typing import Optional
from langchain.memory import ConversationBufferMemory
from api.prompts.prompts import SYSTEM_INSTRUCTION

def build_prompt(user_query: str, context: str, memory: ConversationBufferMemory) -> str:
from api.prompts.prompts import SYSTEM_INSTRUCTION, LOG_ANALYSIS_INSTRUCTION

def build_prompt(
user_query: str,
context: str,
memory: ConversationBufferMemory,
log_context: Optional[str] = None
) -> str:
"""
Build the full prompt by combining system instructions, chat history, context,and user question.
Build the full prompt by combining system instructions, chat history, context,
user question, and optional log data.

Args:
user_query (str): The raw question from the user.
context (str): The relevant retrieved chunks to ground the answer.
memory (ConversationBufferMemory): LangChain memory holding prior chat turns.
log_context (Optional[str]): Raw logs provided by the user (e.g. build failure logs).

Returns:
str: A structured prompt for the language model.
Expand All @@ -22,17 +29,29 @@ def build_prompt(user_query: str, context: str, memory: ConversationBufferMemory
history = "\n".join(
f"{'User' if msg.type == 'human' else 'Jenkins Assistant'}: {msg.content or ''}"
for msg in memory.chat_memory.messages
) if memory.chat_memory.messages else ""
) if memory.chat_memory.messages else ""
else:
history = ""

prompt = f"""{SYSTEM_INSTRUCTION}
# If log context exists, we append it as a specific section
if log_context:
system_prompt = LOG_ANALYSIS_INSTRUCTION
log_section = f"""
User-Provided Log Data:
{log_context}
"""
else:
# Otherwise, use the standard Friendly Assistant prompt
system_prompt = SYSTEM_INSTRUCTION
log_section = ""

prompt = f"""{system_prompt}
Chat History:
{history}

Context:
Context (Documentation & Knowledge Base):
{context}

{log_section}
User Question:
{user_query.strip()}

Expand Down
35 changes: 34 additions & 1 deletion chatbot-core/api/prompts/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
###
Here are some examples:

Query: How do I install Jenkins on Ubuntu?
Query: How do I install Jenkins on Ubuntu?
Answer: SIMPLE

Query: How do I install Jenkins and configure it to use the GitHub plugin?
Expand Down Expand Up @@ -258,3 +258,36 @@

Relevance Analysis:
"""
LOG_ANALYSIS_INSTRUCTION = """
You are an expert Jenkins Log Analyzer.

Your SOLE goal is to identify the root cause of the build failure based on the provided logs.

PRIORITY RULES:
1. Focus 100% on the "User-Provided Log Data".
2. Identify specific error messages, exceptions, or exit codes (e.g., "Build step 'Execute Windows batch command' marked build as failure").
3. Only use the "Context" (documentation) if it helps explain the specific error found in the logs. If the context is unrelated, IGNORE IT.
4. Do not be conversational. Go straight to the point: "The build failed because..."

If the logs do not show a clear error, state: "I cannot find a specific error in the provided logs."
"""
LOG_SUMMARY_PROMPT = """
You are an expert system debugger.
Your task is to extract a concise search query from the provided build logs.
Identify the specific error message, exception type, or exit code.

Do not explain the error. Just output the error signature.

Example 1:
Logs: "Check failed: 'test_pass'. Exit code 1."
Output: Build failure exit code 1

Example 2:
Logs: "Exception in thread main java.lang.NullPointerException at com.example..."
Output: java.lang.NullPointerException

Logs:
{log_data}

Search Query:
"""
72 changes: 54 additions & 18 deletions chatbot-core/api/services/chat_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
QUERY_CLASSIFIER_PROMPT,
RETRIEVER_AGENT_PROMPT,
SPLIT_QUERY_PROMPT,
LOG_SUMMARY_PROMPT,
)
from api.services.memory import get_session
from api.services.file_service import format_file_context
Expand All @@ -32,6 +33,11 @@
retrieval_config = CONFIG["retrieval"]
CODE_BLOCK_PLACEHOLDER_PATTERN = r"\[\[(?:CODE_BLOCK|CODE_SNIPPET)_(\d+)\]\]"

LOG_ANALYSIS_PATTERN = re.compile(
r"Here are the last \d+ characters of the log:\s*```\s*(.*?)\s*```\s*(.*)",
re.DOTALL
)


def get_chatbot_reply(
session_id: str,
Expand All @@ -53,43 +59,60 @@ def get_chatbot_reply(
logger.info("New message from session '%s'", session_id)
logger.info("Handling the user query: %s", user_input)

if files:
logger.info("Processing %d uploaded file(s)", len(files))

memory = get_session(session_id)
if memory is None:
raise RuntimeError(
f"Session '{session_id}' not found in the memory store.")
raise RuntimeError(f"Session '{session_id}' not found in the memory store.")

context = retrieve_context(user_input)
logger.info("Context retrieved: %s", context)

# Add file context if files are provided
file_context = ""
if files:
file_dicts = [file.model_dump() for file in files]
file_context = format_file_context(file_dicts)
if file_context:
logger.info("File context added: %d characters", len(file_context))
context = f"{context}\n\n[User Uploaded Files]\n{file_context}"
# Process file context if files are provided
context = _process_file_context(context, files)

prompt = build_prompt(user_input, context, memory)

logger.info("Generating answer with prompt: %s", prompt)
reply = generate_answer(prompt)

# Include file info in memory message
user_message = user_input
if files:
file_names = [f.filename for f in files]
user_message = f"{user_input}\n[Attached files: {', '.join(file_names)}]"
# Format user message with file info for memory
user_message = _format_user_message_for_memory(user_input, files)

memory.chat_memory.add_user_message(user_message)
memory.chat_memory.add_ai_message(reply)

return ChatResponse(reply=reply)


def _process_file_context(context: str, files: Optional[List[FileAttachment]]) -> str:
"""
Helper function to process uploaded files and append them to the context.
"""
if not files:
return context

logger.info("Processing %d uploaded file(s)", len(files))
file_dicts = [file.model_dump() for file in files]
file_context = format_file_context(file_dicts)

if file_context:
logger.info("File context added: %d characters", len(file_context))
return f"{context}\n\n[User Uploaded Files]\n{file_context}"

return context


def _format_user_message_for_memory(user_input: str, files: Optional[List[FileAttachment]]) -> str:
"""
Helper function to format the user message for memory storage,
appending the names of attached files.
"""
if not files:
return user_input

file_names = [f.filename for f in files]
return f"{user_input}\n[Attached files: {', '.join(file_names)}]"


def get_chatbot_reply_new_architecture(
session_id: str,
user_input: str) -> ChatResponse:
Expand Down Expand Up @@ -504,3 +527,16 @@ def _extract_relevance_score(response: str) -> str:
relevance_score = 0

return relevance_score

def _generate_search_query_from_logs(log_text: str) -> str:
"""
Uses the LLM to extract a concise error signature from the logs
to use as a search query for the vector database.
"""
# Use .format() directly since we are using generate_answer
prompt = LOG_SUMMARY_PROMPT.format(log_data=log_text)

# Generate response using the existing function in this file
search_query = generate_answer(prompt)

return search_query.strip()
39 changes: 39 additions & 0 deletions chatbot-core/api/tools/sanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Module for sanitizing logs by redacting sensitive information."""
import re

def sanitize_logs(log_text: str) -> str:
"""
Scans the input text for common secret patterns (API keys, passwords, tokens)
and replaces them with [REDACTED].
"""
patterns = [
# Generic "password=" or "pwd=" patterns (case insensitive)
(
r'(?i)(password|passwd|pwd|secret|access_token|api_key|client_secret)'
r'\s*[:=]\s*([^\s]+)',
r'\1=[REDACTED]'
),

# AWS Access Key ID (AKI...)
(r'(?<![A-Z0-9])[A-Z0-9]{20}(?![A-Z0-9])', r'[REDACTED_AWS_KEY]'),

# Generic Bearer Token
(r'(?i)(Bearer)\s+[a-zA-Z0-9\-\._~+/]+=*', r'\1 [REDACTED_TOKEN]'),

# GitHub Tokens (ghp_...)
(r'ghp_[a-zA-Z0-9]{36}', r'[REDACTED_GITHUB_TOKEN]'),

# Private Key Blocks
(r'-----BEGIN [A-Z]+ PRIVATE KEY-----.*?-----END [A-Z]+ PRIVATE KEY-----',
r'[REDACTED_PRIVATE_KEY]'
),

# Docker Login Flags (-p password)
(r'(docker\s+login.*?-p\s+)([^\s]+)', r'\1[REDACTED]')
]

sanitized_text = log_text
for pattern, replacement in patterns:
sanitized_text = re.sub(pattern, replacement, sanitized_text, flags=re.DOTALL)

return sanitized_text
5 changes: 3 additions & 2 deletions chatbot-core/rag/embedding/bm25_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,6 @@ def get(self, index_name: str):
logger= LoggerFactory.instance().get_logger("bm25indexer")
)

if not CONFIG["is_test_mode"]:
indexer.build()
if __name__ == "__main__":
if not CONFIG["is_test_mode"]:
indexer.build()
4 changes: 2 additions & 2 deletions chatbot-core/tests/unit/chunking/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def test_save_chunks_handles_error(mocker):
def raise_oserror(*args, **kwargs):
raise OSError("Disk full")

with mocker.patch("builtins.open", side_effect=raise_oserror):
save_chunks(fake_path, data, logger)
mocker.patch("builtins.open", side_effect=raise_oserror)
save_chunks(fake_path, data, logger)

logger.error.assert_called_once()
assert "File error while writing" in logger.error.call_args[0][0]
Expand Down
10 changes: 7 additions & 3 deletions chatbot-core/tests/unit/prompts/test_prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,20 @@ def test_build_prompt_with_none_memory():
def get_prompt_indexes(prompt: str) -> tuple[int, int, int, int]:
"""Helper to extract section positions in the prompt."""
chat_idx = prompt.index("Chat History:")
context_idx = prompt.index("Context:")
context_idx = prompt.index("Context (Documentation & Knowledge Base):")
question_idx = prompt.index("User Question:")
answer_idx = prompt.index("Answer:")

return chat_idx, context_idx, question_idx, answer_idx

def get_prompt_sections(prompt: str) -> tuple[str, str, str]:
"""Helper to extract prompt sections by label."""
history_section = prompt.split("Chat History:")[1].split("Context:")[0]
context_section = prompt.split("Context:")[1].split("User Question:")[0]
history_part = prompt.split("Chat History:")[1]
history_section = history_part.split("Context (Documentation & Knowledge Base):")[0]

context_part = prompt.split("Context (Documentation & Knowledge Base):")[1]
context_section = context_part.split("User Question:")[0]

question_section = prompt.split("User Question:")[1].split("Answer:")[0]

return history_section, context_section, question_section
Loading
Loading