Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions chatbot-core/api/config/config-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ retrieval:
top_k_docs: 6
top_k_plugins: 5
top_k_discourse: 5
top_k_stackoverflow: 5 #-> second change
top_k_semantic: 20
top_k_keyword: 20
semantic_threshold: 1.5
Expand All @@ -35,6 +36,7 @@ tool_names:
plugins: "plugins"
jenkins_docs: "docs"
community_threads: "discourse"
stackoverflow: "stackoverflow" #-> first change
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the first, second, ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the review @berviantoleo!
I checked PR #172. The key difference in my PR is that it also standardizes the tool interface by adding keywords and logger parameters to match the contract of the other 3 search tools (search_jenkins_docs, search_plugin_docs, search_community_threads).
PR #172 keeps search_stackoverflow_threads(query) only, which leaves the interface inconsistency unfixed.
I will remove the inline comment (#-> first change) from both config files as requested. Should I also address the interface standardization separately or keep it in this PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to comment on existing PR, suggest the what you found that the contributor will miss. It's another way to contribute. Contribution doesn't mean you need to be an author; as a pair-reviewer, it's also valuable.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood! I will go to PR #172 and leave a review comment there pointing out the interface inconsistency.
Thank you for the guidance @berviantoleo!


cors:
allowed_origins:
Expand Down
2 changes: 2 additions & 0 deletions chatbot-core/api/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ retrieval:
top_k_docs: 6
top_k_plugins: 5
top_k_discourse: 5
top_k_stackoverflow: 5 #-> second change
top_k_semantic: 20
top_k_keyword: 20
semantic_threshold: 1.5
Expand All @@ -35,6 +36,7 @@ tool_names:
plugins: "plugins"
jenkins_docs: "docs"
community_threads: "discourse"
stackoverflow: "stackoverflow" #-> first change

cors:
allowed_origins:
Expand Down
36 changes: 30 additions & 6 deletions chatbot-core/api/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,39 @@ def search_jenkins_docs(query: str, keywords: str, logger) -> str:
top_k=retrieval_config["top_k_docs"],
logger=logger
)

def search_stackoverflow_threads(query: str) -> str:
#-> third change
def search_stackoverflow_threads(query: str, keywords: str, logger) -> str:
"""
Stackoverflow Search tool
Search tool for Stack Overflow threads. Exploits both a sparse and dense
search, resulting in a hybrid search.

Args:
query (str): The user query.
keywords (str): Keywords extracted from the user query.
logger: Logger object.

Returns:
str: The result of the Stack Overflow search tool.
"""
if query:
pass
return "Nothing relevant"
source_name = CONFIG["tool_names"]["stackoverflow"]
data_retrieved_semantic, scores_semantic, data_retrieved_keyword, scores_keyword = (
retrieve_documents(
query=query,
keywords=keywords,
logger=logger,
source_name=source_name,
embedding_model=EMBEDDING_MODEL
)
)

return extract_top_chunks(
data_retrieved_semantic,
scores_semantic,
data_retrieved_keyword,
scores_keyword,
top_k=retrieval_config["top_k_stackoverflow"],
logger=logger
)
def search_community_threads(query: str, keywords: str, logger) -> str:
"""
Search tool for the community discourse threads. Exploits both a sparse and
Expand Down
8 changes: 5 additions & 3 deletions chatbot-core/api/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
TOOL_SIGNATURES = MappingProxyType({
"search_plugin_docs": {"plugin_name": str, "query": str},
"search_jenkins_docs": {"query": str},
"search_stackoverflow_threads": {"query": str},
"search_stackoverflow_threads": {"query": str, "keywords": str}, #-> fourth change
"search_community_threads": {"query": str},
})

Expand Down Expand Up @@ -48,10 +48,12 @@ def get_default_tools_call(query: str):
"query": query
}
},
# -> fifth change
{
"tool": "search_stackoverflow_threads",
"tool": "search_stackoverflow_threads",
"params": {
"query": query
"query": query,
"keywords": query
}
},
{
Expand Down
70 changes: 70 additions & 0 deletions chatbot-core/tests/unit/tools/test_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Unit tests for the tools module."""
import unittest
from unittest.mock import MagicMock, patch


class TestSearchStackoverflowThreads(unittest.TestCase):
"""Test suite for search_stackoverflow_threads tool."""

def _make_logger(self):
return MagicMock()

@patch("api.tools.tools.retrieve_documents")
@patch("api.tools.tools.extract_top_chunks")
def test_returns_extract_top_chunks_result(self, mock_extract, mock_retrieve):
"""Test that the function returns the result of extract_top_chunks."""
mock_retrieve.return_value = ([], [], [], [])
mock_extract.return_value = "some result"

from api.tools.tools import search_stackoverflow_threads
result = search_stackoverflow_threads("how to use jenkins", "jenkins", self._make_logger())

self.assertEqual(result, "some result")

@patch("api.tools.tools.retrieve_documents")
@patch("api.tools.tools.extract_top_chunks")
def test_calls_retrieve_documents_with_correct_args(self, mock_extract, mock_retrieve):
"""Test that retrieve_documents is called with query and keywords."""
mock_retrieve.return_value = ([], [], [], [])
mock_extract.return_value = ""
logger = self._make_logger()

from api.tools.tools import search_stackoverflow_threads
search_stackoverflow_threads("pipeline error", "pipeline", logger)

mock_retrieve.assert_called_once()
call_kwargs = mock_retrieve.call_args.kwargs
self.assertEqual(call_kwargs["query"], "pipeline error")
self.assertEqual(call_kwargs["keywords"], "pipeline")

@patch("api.tools.tools.retrieve_documents")
@patch("api.tools.tools.extract_top_chunks")
def test_does_not_return_hardcoded_nothing_relevant(self, mock_extract, mock_retrieve):
"""Test that the stub behaviour is gone — never returns hardcoded 'Nothing relevant'."""
mock_retrieve.return_value = ([], [], [], [])
mock_extract.return_value = "No context available."

from api.tools.tools import search_stackoverflow_threads
result = search_stackoverflow_threads("any query", "any", self._make_logger())

self.assertNotEqual(result, "Nothing relevant")


class TestToolSignatures(unittest.TestCase):
"""Test that all tools have consistent signatures."""

def test_stackoverflow_has_keywords_in_signature(self):
"""Test that TOOL_SIGNATURES includes keywords for stackoverflow."""
from api.tools.utils import TOOL_SIGNATURES
self.assertIn("keywords", TOOL_SIGNATURES["search_stackoverflow_threads"])

def test_default_tool_calls_include_keywords_for_stackoverflow(self):
"""Test that get_default_tools_call includes keywords for stackoverflow."""
from api.tools.utils import get_default_tools_call
calls = get_default_tools_call("test query")
so_call = next(c for c in calls if c["tool"] == "search_stackoverflow_threads")
self.assertIn("keywords", so_call["params"])


if __name__ == "__main__":
unittest.main()