diff --git a/chatbot-core/api/models/schemas.py b/chatbot-core/api/models/schemas.py index 3db55e4ea..61ea82403 100644 --- a/chatbot-core/api/models/schemas.py +++ b/chatbot-core/api/models/schemas.py @@ -159,6 +159,37 @@ class MessageHistoryResponse(BaseModel): session_id: str messages: List[MessageItem] + +class SessionInfo(BaseModel): + """ + Basic metadata for a single active session. + + Fields: + session_id (str): The session identifier. + message_count (int): Number of messages exchanged in the session. + last_accessed (str): ISO-8601 timestamp of last activity. + """ + session_id: str + message_count: int + last_accessed: str + + +class SessionListResponse(BaseModel): + """ + Response model for listing all active sessions. + + Fields: + sessions (List[SessionInfo]): Ordered list of active sessions. + total (int): Total number of active sessions returned. + page (int): Current page number (1-indexed). + page_size (int): Number of sessions per page. + """ + sessions: List[SessionInfo] + total: int + page: int + page_size: int + + class QueryType(Enum): """ Enum that represents the possible query types: diff --git a/chatbot-core/api/routes/chatbot.py b/chatbot-core/api/routes/chatbot.py index 11844a971..88c691eb9 100644 --- a/chatbot-core/api/routes/chatbot.py +++ b/chatbot-core/api/routes/chatbot.py @@ -38,6 +38,8 @@ ChatResponse, DeleteResponse, MessageHistoryResponse, + SessionInfo, + SessionListResponse, SessionResponse, FileAttachment, SupportedExtensionsResponse, @@ -49,6 +51,7 @@ from api.services.memory import ( delete_session, get_session, + list_sessions, session_exists, persist_session, init_session, @@ -150,6 +153,47 @@ async def chatbot_stream(websocket: WebSocket, session_id: str): # ========================= # Session Management # ========================= +@router.get( + "/sessions", + response_model=SessionListResponse, +) +def get_sessions( + page: int = 1, + page_size: int = 20, +): + """ + List all active chat sessions. + + Returns a paginated list of currently active sessions with basic + metadata (ID, message count, and last-accessed timestamp). + + Query Parameters: + page (int): 1-indexed page number (default: 1, min: 1). + page_size (int): Sessions per page (default: 20, range: 1-100). + + Returns: + SessionListResponse: Paginated session list with total count. + """ + page = max(1, page) + page_size = max(1, min(page_size, 100)) + + result = list_sessions(page=page, page_size=page_size) + sessions = [ + SessionInfo( + session_id=s["session_id"], + message_count=s["message_count"], + last_accessed=s["last_accessed"], + ) + for s in result["sessions"] + ] + return SessionListResponse( + sessions=sessions, + total=result["total"], + page=result["page"], + page_size=result["page_size"], + ) + + @router.post( "/sessions", response_model=SessionResponse, diff --git a/chatbot-core/api/services/memory.py b/chatbot-core/api/services/memory.py index 3cb75abc2..f69fc1b0e 100644 --- a/chatbot-core/api/services/memory.py +++ b/chatbot-core/api/services/memory.py @@ -236,6 +236,50 @@ def set_last_accessed(session_id: str, timestamp: datetime) -> bool: return False +def list_sessions(page: int = 1, page_size: int = 20) -> dict: + """ + Return a paginated list of all active in-memory sessions with basic metadata. + + Each entry includes the session ID, number of messages exchanged, and the + ISO-8601 last-accessed timestamp. + + Args: + page (int): 1-indexed page number. Defaults to 1. + page_size (int): Maximum sessions per page. Defaults to 20. + + Returns: + dict: Contains ``sessions`` (list of metadata dicts), ``total`` (total + count before pagination), ``page``, and ``page_size``. + """ + with _lock: + all_ids = sorted(_sessions.keys()) + total = len(all_ids) + + start = (page - 1) * page_size + end = start + page_size + page_ids = all_ids[start:end] + + sessions = [] + for session_id in page_ids: + session_data = _sessions.get(session_id) + if session_data is None: + continue + message_count = len(session_data["memory"].chat_memory.messages) + last_accessed: datetime = session_data["last_accessed"] + sessions.append({ + "session_id": session_id, + "message_count": message_count, + "last_accessed": last_accessed.isoformat(), + }) + + return { + "sessions": sessions, + "total": total, + "page": page, + "page_size": page_size, + } + + def get_session_count() -> int: """ Get the total number of active sessions (for testing purposes). diff --git a/chatbot-core/tests/integration/test_chatbot.py b/chatbot-core/tests/integration/test_chatbot.py index 6cbbcf719..5292a4eb6 100644 --- a/chatbot-core/tests/integration/test_chatbot.py +++ b/chatbot-core/tests/integration/test_chatbot.py @@ -203,3 +203,99 @@ def get_relevant_documents_output(): "id": "docid", "chunk_text": "Relevant chunk text." }],[0.84]) + + +# ========================= +# GET /sessions integration tests +# ========================= +def test_list_sessions_empty(client): + """Should return an empty session list when no sessions have been created.""" + response = client.get("/sessions") + + assert response.status_code == 200 + data = response.json() + assert data["sessions"] == [] + assert data["total"] == 0 + assert data["page"] == 1 + assert data["page_size"] == 20 + + +def test_list_sessions_after_create(client): + """Should include a newly created session in the list.""" + create_resp = client.post("/sessions") + session_id = create_resp.json()["session_id"] + + response = client.get("/sessions") + + assert response.status_code == 200 + data = response.json() + assert data["total"] == 1 + ids = [s["session_id"] for s in data["sessions"]] + assert session_id in ids + + +def test_list_sessions_message_count(client, mock_llm_provider, mock_get_relevant_documents): + """Should report accurate message_count after exchanging messages.""" + mock_llm_provider.generate.return_value = "Hello!" + mock_get_relevant_documents.return_value = get_relevant_documents_output() + + session_id = client.post("/sessions").json()["session_id"] + client.post(f"/sessions/{session_id}/message", json={"message": "Hi"}) + + data = client.get("/sessions").json() + + session = next(s for s in data["sessions"] if s["session_id"] == session_id) + # 1 human + 1 ai message = 2 + assert session["message_count"] == 2 + + +def test_list_sessions_multiple_sessions(client): + """Should list all active sessions.""" + id_a = client.post("/sessions").json()["session_id"] + id_b = client.post("/sessions").json()["session_id"] + + data = client.get("/sessions").json() + + assert data["total"] == 2 + ids = {s["session_id"] for s in data["sessions"]} + assert id_a in ids + assert id_b in ids + + +def test_list_sessions_excludes_deleted(client): + """Should not include sessions that have been deleted.""" + keep_id = client.post("/sessions").json()["session_id"] + drop_id = client.post("/sessions").json()["session_id"] + client.delete(f"/sessions/{drop_id}") + + data = client.get("/sessions").json() + + ids = [s["session_id"] for s in data["sessions"]] + assert keep_id in ids + assert drop_id not in ids + assert data["total"] == 1 + + +def test_list_sessions_pagination(client): + """Should respect page and page_size query parameters.""" + for _ in range(3): + client.post("/sessions") + + page1 = client.get("/sessions?page=1&page_size=2").json() + page2 = client.get("/sessions?page=2&page_size=2").json() + + assert len(page1["sessions"]) == 2 + assert len(page2["sessions"]) == 1 + assert page1["total"] == 3 + assert page2["total"] == 3 + + +def test_list_sessions_response_has_last_accessed(client): + """Each session entry should include a non-empty last_accessed ISO timestamp.""" + client.post("/sessions") + + session = client.get("/sessions").json()["sessions"][0] + + assert "last_accessed" in session + assert len(session["last_accessed"]) > 0 + diff --git a/chatbot-core/tests/unit/mocks/test_env.py b/chatbot-core/tests/unit/mocks/test_env.py index 7db039f11..058cb56c3 100644 --- a/chatbot-core/tests/unit/mocks/test_env.py +++ b/chatbot-core/tests/unit/mocks/test_env.py @@ -52,6 +52,11 @@ def mock_delete_session(mocker): """Mock the delete_session function.""" return mocker.patch("api.routes.chatbot.delete_session") +@pytest.fixture +def mock_list_sessions(mocker): + """Mock the list_sessions function.""" + return mocker.patch("api.routes.chatbot.list_sessions") + @pytest.fixture def mock_get_chatbot_reply(mocker): """Mock the get_chatbot_reply function.""" diff --git a/chatbot-core/tests/unit/rag/embedding/test_bm25_indexer.py b/chatbot-core/tests/unit/rag/embedding/test_bm25_indexer.py new file mode 100644 index 000000000..3483fed6c --- /dev/null +++ b/chatbot-core/tests/unit/rag/embedding/test_bm25_indexer.py @@ -0,0 +1,183 @@ +"""Unit Tests for rag/embedding/bm25_indexer.py.""" + +import pytest +from rag.embedding.bm25_indexer import BM25Indexer + + +# ========================= +# build() tests +# ========================= +def test_build_stores_retriever_for_valid_config(mocker): + """build() should store a retriever for each config when SparseRetriever is available.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + + indexer = _make_indexer(mocker) + mock_indexed = mocker.Mock() + mock_sr.return_value.index_file.return_value = mock_indexed + + indexer.build() + + assert "test_index" in indexer.retrievers + assert indexer.retrievers["test_index"] == mock_indexed + + +def test_build_stores_multiple_retrievers(mocker): + """build() should store retrievers for all valid configs.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + + configs = [ + {"index_name": "idx_a", "file_path": "a.jsonl"}, + {"index_name": "idx_b", "file_path": "b.jsonl"}, + ] + indexer = BM25Indexer(index_configs=configs, logger=mocker.Mock()) + mock_sr.return_value.index_file.return_value = mocker.Mock() + + indexer.build() + + assert "idx_a" in indexer.retrievers + assert "idx_b" in indexer.retrievers + + +def test_build_skips_config_when_sparse_retriever_unavailable(mocker): + """build() should not populate retrievers when SparseRetriever is None.""" + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", None) + + indexer = _make_indexer(mocker) + indexer.build() + + assert indexer.retrievers == {} + + +def test_build_skips_config_when_index_config_raises(mocker): + """build() should skip a config whose _index_config returns None due to an error.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + mock_sr.return_value.index_file.side_effect = RuntimeError("disk error") + + indexer = _make_indexer(mocker) + indexer.build() + + assert indexer.retrievers == {} + + +# ========================= +# _index_config() tests +# ========================= +def test_index_config_returns_none_when_sparse_retriever_is_none(mocker): + """_index_config() should return None immediately when SparseRetriever is None.""" + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", None) + + indexer = _make_indexer(mocker) + result = indexer._index_config({"index_name": "idx", "file_path": "f.jsonl"}) + + assert result is None + + +def test_index_config_returns_retriever_on_success(mocker): + """_index_config() should return the indexed SparseRetriever on success.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + mock_indexed = mocker.Mock() + mock_sr.return_value.index_file.return_value = mock_indexed + + indexer = _make_indexer(mocker) + result = indexer._index_config({"index_name": "idx", "file_path": "f.jsonl"}) + + assert result == mock_indexed + + +def test_index_config_handles_indexing_error_gracefully(mocker): + """_index_config() should catch exceptions, log the error, and return None.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + mock_sr.return_value.index_file.side_effect = Exception("index failed") + + mock_logger = mocker.Mock() + indexer = BM25Indexer(index_configs=[], logger=mock_logger) + result = indexer._index_config({"index_name": "idx", "file_path": "f.jsonl"}) + + assert result is None + mock_logger.error.assert_called_once() + assert "idx" in mock_logger.error.call_args[0][1] + + +# ========================= +# get() tests +# ========================= +def test_get_returns_cached_retriever(mocker): + """get() should return the in-memory retriever without hitting disk.""" + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mocker.Mock()) + + indexer = _make_indexer(mocker) + cached = mocker.Mock() + indexer.retrievers["test_index"] = cached + + result = indexer.get("test_index") + + assert result is cached + + +def test_get_loads_retriever_from_disk_when_not_cached(mocker): + """get() should load from disk and cache the result when not already in memory.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + loaded = mocker.Mock() + mock_sr.load.return_value = loaded + + indexer = _make_indexer(mocker) + result = indexer.get("test_index") + + assert result is loaded + assert indexer.retrievers["test_index"] is loaded + mock_sr.load.assert_called_once_with("test_index") + + +def test_get_returns_none_when_load_fails(mocker): + """get() should return None and log a warning when disk load raises an exception.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + mock_sr.load.side_effect = Exception("not found") + + mock_logger = mocker.Mock() + indexer = BM25Indexer(index_configs=[], logger=mock_logger) + result = indexer.get("missing_index") + + assert result is None + mock_logger.warning.assert_called_once() + assert "missing_index" in mock_logger.warning.call_args[0][1] + + +def test_get_returns_none_when_sparse_retriever_unavailable(mocker): + """get() should return None immediately when SparseRetriever is None.""" + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", None) + + indexer = _make_indexer(mocker) + result = indexer.get("test_index") + + assert result is None + + +def test_get_does_not_call_load_when_retriever_cached(mocker): + """get() should not call SparseRetriever.load when the retriever is already cached.""" + mock_sr = mocker.Mock() + mocker.patch("rag.embedding.bm25_indexer.SparseRetriever", mock_sr) + + indexer = _make_indexer(mocker) + indexer.retrievers["test_index"] = mocker.Mock() + + indexer.get("test_index") + + mock_sr.load.assert_not_called() + + +# ========================= +# Helpers +# ========================= +def _make_indexer(mocker): + """Return a BM25Indexer with a single test config and a mock logger.""" + return BM25Indexer( + index_configs=[{"index_name": "test_index", "file_path": "test.jsonl"}], + logger=mocker.Mock(), + ) diff --git a/chatbot-core/tests/unit/routes/test_chatbot.py b/chatbot-core/tests/unit/routes/test_chatbot.py index 7fa5dc4a6..8f362d72d 100644 --- a/chatbot-core/tests/unit/routes/test_chatbot.py +++ b/chatbot-core/tests/unit/routes/test_chatbot.py @@ -64,3 +64,102 @@ def test_delete_chat_not_found(client, mock_delete_session): assert response.status_code == 404 assert response.json() == {"detail": "Session not found."} + + +# ========================= +# GET /sessions tests +# ========================= +def _make_session_payload(session_id="abc-123", message_count=4): + """Build the dict that list_sessions returns for a single session.""" + return { + "sessions": [ + { + "session_id": session_id, + "message_count": message_count, + "last_accessed": "2026-01-01T00:00:00", + } + ], + "total": 1, + "page": 1, + "page_size": 20, + } + + +def test_get_sessions_empty(client, mock_list_sessions): + """GET /sessions returns an empty list when no sessions exist.""" + mock_list_sessions.return_value = { + "sessions": [], + "total": 0, + "page": 1, + "page_size": 20, + } + + response = client.get("/sessions") + + assert response.status_code == 200 + data = response.json() + assert data["sessions"] == [] + assert data["total"] == 0 + assert data["page"] == 1 + assert data["page_size"] == 20 + + +def test_get_sessions_with_sessions(client, mock_list_sessions): + """GET /sessions returns session metadata when sessions exist.""" + mock_list_sessions.return_value = _make_session_payload() + + response = client.get("/sessions") + + assert response.status_code == 200 + data = response.json() + assert data["total"] == 1 + assert len(data["sessions"]) == 1 + session = data["sessions"][0] + assert session["session_id"] == "abc-123" + assert session["message_count"] == 4 + assert "last_accessed" in session + + +def test_get_sessions_default_pagination(client, mock_list_sessions): + """GET /sessions calls list_sessions with default page=1 and page_size=20.""" + mock_list_sessions.return_value = { + "sessions": [], "total": 0, "page": 1, "page_size": 20, + } + + client.get("/sessions") + + mock_list_sessions.assert_called_once_with(page=1, page_size=20) + + +def test_get_sessions_custom_pagination(client, mock_list_sessions): + """GET /sessions forwards custom page and page_size query params.""" + mock_list_sessions.return_value = { + "sessions": [], "total": 0, "page": 2, "page_size": 5, + } + + client.get("/sessions?page=2&page_size=5") + + mock_list_sessions.assert_called_once_with(page=2, page_size=5) + + +def test_get_sessions_clamps_page_size_to_100(client, mock_list_sessions): + """GET /sessions clamps page_size > 100 down to 100.""" + mock_list_sessions.return_value = { + "sessions": [], "total": 0, "page": 1, "page_size": 100, + } + + client.get("/sessions?page_size=9999") + + mock_list_sessions.assert_called_once_with(page=1, page_size=100) + + +def test_get_sessions_clamps_page_to_minimum_1(client, mock_list_sessions): + """GET /sessions clamps page < 1 up to 1.""" + mock_list_sessions.return_value = { + "sessions": [], "total": 0, "page": 1, "page_size": 20, + } + + client.get("/sessions?page=-5") + + mock_list_sessions.assert_called_once_with(page=1, page_size=20) +