Skip to content

Commit 48fd5d6

Browse files
author
kaneliu120
committed
fix: stabilize api compatibility, health checks, and test/perf pipeline
1 parent 20547ab commit 48fd5d6

24 files changed

Lines changed: 685 additions & 282 deletions

backend/app/api/v1/analytics.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
@router.get("/overview")
1919
async def get_overview(
20-
period: str = Query(default="7d", regex="^(7d|30d|90d)$"),
20+
period: str = Query(default="7d", pattern="^(7d|30d|90d)$"),
2121
current_user: User = Depends(get_current_user),
2222
) -> Dict[str, Any]:
2323
start = datetime.utcnow()
@@ -95,10 +95,10 @@ async def get_document_stats(
9595

9696
@router.get("/search-trends")
9797
async def get_search_trends(
98-
period: str = Query(default="7d", regex="^(7d|30d)$"),
98+
period: str = Query(default="7d", pattern="^(7d|30d|90d)$"),
9999
current_user: User = Depends(get_current_user),
100100
) -> Dict[str, Any]:
101-
period_days_map = {"7d": 7, "30d": 30}
101+
period_days_map = {"7d": 7, "30d": 30, "90d": 90}
102102
days = period_days_map[period]
103103
since = datetime.utcnow() - timedelta(days=days)
104104

backend/app/api/v1/knowledge.py

Lines changed: 237 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,84 @@
11
"""知识管理API"""
22
from fastapi import APIRouter, Depends, HTTPException, Request, status, Query
33
from sqlalchemy.ext.asyncio import AsyncSession
4-
from sqlalchemy import select, func
5-
from typing import List, Optional
4+
from sqlalchemy import select, func, or_
5+
from typing import List, Optional, Dict, Any
66
from datetime import datetime
7+
from pydantic import BaseModel, Field
78

8-
from app.core.database import get_db, KnowledgeDocument
9+
from app.core.database import get_db, KnowledgeDocument, SearchHistory
910
from app.core.auth import get_current_user
1011
from app.core.rag_engine import RAGEngine
1112
from app.utils.validation import (
1213
validate_search_query, validate_document_content,
13-
validate_metadata, InputValidator
14+
validate_metadata
1415
)
1516
from app.models.knowledge import (
1617
DocumentCreate, DocumentUpdate, DocumentResponse,
17-
DocumentListResponse, DocumentSearchRequest
18+
DocumentListResponse
1819
)
1920
from app.models.user import User
2021
from app.utils.error_handlers import handle_internal_error
2122

2223
router = APIRouter()
2324

2425

26+
class KnowledgeSearchRequest(BaseModel):
27+
query: str = Field(..., min_length=1, max_length=1000)
28+
limit: int = Field(default=10, ge=1, le=100)
29+
strategy: str = Field(default="semantic")
30+
source_types: Optional[List[str]] = None
31+
filters: Optional[Dict[str, Any]] = None
32+
33+
34+
def _document_to_search_result(doc: KnowledgeDocument, score: float = 0.5, search_type: str = "keyword") -> Dict[str, Any]:
35+
updated_at = doc.updated_at.isoformat() if doc.updated_at else ""
36+
created_at = doc.created_at.isoformat() if doc.created_at else ""
37+
return {
38+
"id": str(doc.id),
39+
"title": doc.title,
40+
"content": (doc.content or "")[:500],
41+
"type": doc.source_type or "unknown",
42+
"source": doc.source_path or doc.source_type or "unknown",
43+
"size": f"{len((doc.content or '').encode('utf-8'))} B",
44+
"views": 0,
45+
"starred": False,
46+
"tags": doc.tags or [],
47+
"created_at": created_at,
48+
"updated_at": updated_at,
49+
"updated": updated_at,
50+
"score": float(max(0.0, min(1.0, score))),
51+
"search_type": search_type,
52+
}
53+
54+
55+
def _to_document_response(doc: KnowledgeDocument) -> DocumentResponse:
56+
"""Map ORM entity to API response and preserve metadata field naming."""
57+
return DocumentResponse(
58+
id=doc.id,
59+
title=doc.title,
60+
content=doc.content,
61+
source_type=doc.source_type,
62+
source_path=doc.source_path,
63+
metadata=doc.document_metadata or {},
64+
chunk_index=doc.chunk_index,
65+
total_chunks=doc.total_chunks,
66+
tags=doc.tags or [],
67+
category=doc.category,
68+
embedding_id=doc.embedding_id,
69+
created_at=doc.created_at,
70+
updated_at=doc.updated_at,
71+
is_active=doc.is_active,
72+
)
73+
74+
2575
@router.get("/documents", response_model=DocumentListResponse)
2676
async def list_documents(
2777
request: Request,
2878
skip: int = Query(default=0, ge=0, le=10000),
2979
limit: int = Query(default=100, ge=1, le=1000),
80+
page: Optional[int] = Query(default=None, ge=1),
81+
sort: str = Query(default="updated"),
3082
source_type: Optional[str] = None,
3183
is_active: Optional[bool] = True,
3284
tags: Optional[str] = None,
@@ -36,6 +88,9 @@ async def list_documents(
3688
):
3789
"""获取文档列表"""
3890
try:
91+
if page is not None and skip == 0:
92+
skip = (page - 1) * limit
93+
3994
query = select(KnowledgeDocument)
4095

4196
if source_type:
@@ -47,7 +102,15 @@ async def list_documents(
47102
if category:
48103
query = query.where(KnowledgeDocument.category == category)
49104

50-
query = query.offset(skip).limit(limit).order_by(KnowledgeDocument.updated_at.desc())
105+
sort_key = (sort or "updated").lower()
106+
if sort_key == "created":
107+
order_by = KnowledgeDocument.created_at.desc()
108+
elif sort_key == "title":
109+
order_by = KnowledgeDocument.title.asc()
110+
else:
111+
order_by = KnowledgeDocument.updated_at.desc()
112+
113+
query = query.offset(skip).limit(limit).order_by(order_by)
51114

52115
result = await db.execute(query)
53116
documents = result.scalars().all()
@@ -78,7 +141,7 @@ async def list_documents(
78141
total = len(documents)
79142

80143
return DocumentListResponse(
81-
documents=[DocumentResponse.from_orm(doc) for doc in documents],
144+
documents=[_to_document_response(doc) for doc in documents],
82145
total=total,
83146
skip=skip,
84147
limit=limit
@@ -88,6 +151,164 @@ async def list_documents(
88151
raise handle_internal_error("Document listing", e)
89152

90153

154+
@router.get("/stats")
155+
async def get_knowledge_stats(
156+
db: AsyncSession = Depends(get_db),
157+
current_user: User = Depends(get_current_user),
158+
):
159+
"""知识库统计(兼容旧前端和测试脚本)"""
160+
try:
161+
start = datetime.utcnow()
162+
since = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
163+
164+
result = await db.execute(
165+
select(
166+
func.count().label("total"),
167+
func.count().filter(KnowledgeDocument.created_at >= since).label("today_imports"),
168+
).where(KnowledgeDocument.is_active == True)
169+
)
170+
row = result.one()
171+
172+
took_ms = (datetime.utcnow() - start).total_seconds() * 1000
173+
return {
174+
"total_documents": row.total or 0,
175+
"total_searches": 0,
176+
"today_imports": row.today_imports or 0,
177+
"vector_store_size": f"{row.total or 0} docs",
178+
"status": "healthy",
179+
"took_ms": round(took_ms, 2),
180+
}
181+
except Exception as e:
182+
raise handle_internal_error("Knowledge stats", e)
183+
184+
185+
@router.get("/search-modes")
186+
async def get_search_modes(current_user: User = Depends(get_current_user)):
187+
"""返回可用搜索模式(前端配置页面兼容接口)"""
188+
return {
189+
"modes": [
190+
{"id": "semantic", "name": "Semantic", "description": "Embedding-based semantic retrieval"},
191+
{"id": "keyword", "name": "Keyword", "description": "Keyword matching over titles and content"},
192+
{"id": "hybrid", "name": "Hybrid", "description": "Semantic + keyword blended ranking"},
193+
],
194+
"default": "semantic",
195+
}
196+
197+
198+
@router.post("/search")
199+
async def search_documents(
200+
search_request: KnowledgeSearchRequest,
201+
request: Request,
202+
db: AsyncSession = Depends(get_db),
203+
current_user: User = Depends(get_current_user),
204+
):
205+
"""搜索文档(兼容前端 `/api/v1/knowledge/search`)"""
206+
try:
207+
start = datetime.utcnow()
208+
query_text = validate_search_query(search_request.query)
209+
strategy = (search_request.strategy or "semantic").lower()
210+
limit = search_request.limit
211+
212+
source_types = search_request.source_types or []
213+
if not source_types and isinstance(search_request.filters, dict):
214+
f_source_types = search_request.filters.get("source_types")
215+
if isinstance(f_source_types, list):
216+
source_types = [str(v) for v in f_source_types if v]
217+
218+
base_query = select(KnowledgeDocument).where(KnowledgeDocument.is_active == True)
219+
if source_types:
220+
base_query = base_query.where(KnowledgeDocument.source_type.in_(source_types))
221+
222+
results: List[Dict[str, Any]] = []
223+
used_strategy = "keyword"
224+
matched_count = 0
225+
226+
# 1) Semantic / Hybrid path through RAG
227+
rag_engine: Optional[RAGEngine] = getattr(request.app.state, "rag_engine", None)
228+
rag_usable = bool(getattr(request.app.state, "rag_available", False) and rag_engine is not None)
229+
230+
if strategy in {"semantic", "hybrid"} and rag_usable:
231+
try:
232+
rag_hits = await rag_engine.search(query=query_text, top_k=min(100, max(limit * 2, limit)))
233+
if rag_hits:
234+
hit_ids = [int(h.document_id) for h in rag_hits if str(h.document_id).isdigit()]
235+
if hit_ids:
236+
db_rows = await db.execute(
237+
base_query.where(KnowledgeDocument.id.in_(hit_ids))
238+
)
239+
doc_map = {doc.id: doc for doc in db_rows.scalars().all()}
240+
for hit in rag_hits:
241+
if not str(hit.document_id).isdigit():
242+
continue
243+
doc = doc_map.get(int(hit.document_id))
244+
if doc is None:
245+
continue
246+
results.append(_document_to_search_result(doc, score=hit.score, search_type="semantic"))
247+
used_strategy = "semantic"
248+
except Exception:
249+
# Graceful fallback to keyword search below
250+
results = []
251+
252+
# 2) Keyword path (or fallback)
253+
if not results or strategy == "keyword":
254+
kq = (
255+
base_query.where(
256+
or_(
257+
KnowledgeDocument.title.ilike(f"%{query_text}%"),
258+
KnowledgeDocument.content.ilike(f"%{query_text}%"),
259+
)
260+
)
261+
.order_by(KnowledgeDocument.updated_at.desc())
262+
.limit(limit * 3)
263+
)
264+
kw_result = await db.execute(kq)
265+
kw_docs = kw_result.scalars().all()
266+
kw_items = []
267+
for doc in kw_docs:
268+
title_hit = query_text.lower() in (doc.title or "").lower()
269+
score = 0.9 if title_hit else 0.75
270+
kw_items.append(_document_to_search_result(doc, score=score, search_type="keyword"))
271+
272+
if strategy == "hybrid" and results:
273+
# Merge semantic + keyword by max score
274+
index: Dict[str, Dict[str, Any]] = {item["id"]: item for item in results}
275+
for item in kw_items:
276+
existing = index.get(item["id"])
277+
if existing is None or item["score"] > existing["score"]:
278+
index[item["id"]] = item
279+
results = list(index.values())
280+
used_strategy = "hybrid"
281+
else:
282+
results = kw_items
283+
used_strategy = "keyword"
284+
285+
# Sort and cap
286+
results.sort(key=lambda x: x.get("score", 0), reverse=True)
287+
results = results[:limit]
288+
matched_count = len(results)
289+
290+
# Save history for analytics
291+
db.add(
292+
SearchHistory(
293+
query=query_text,
294+
results_count=matched_count,
295+
user_id=str(getattr(current_user, "username", "unknown")),
296+
session_id=request.headers.get("X-Request-ID"),
297+
)
298+
)
299+
300+
took_ms = (datetime.utcnow() - start).total_seconds() * 1000
301+
return {
302+
"query": query_text,
303+
"results": results,
304+
"total": matched_count,
305+
"search_type": used_strategy,
306+
"total_took_ms": round(took_ms, 2),
307+
}
308+
except Exception as e:
309+
raise handle_internal_error("Knowledge search", e)
310+
311+
91312
@router.get("/documents/{document_id}", response_model=DocumentResponse)
92313
async def get_document(
93314
document_id: int,
@@ -109,7 +330,7 @@ async def get_document(
109330
detail=f"Document with id {document_id} not found"
110331
)
111332

112-
return DocumentResponse.from_orm(document)
333+
return _to_document_response(document)
113334

114335
except HTTPException:
115336
raise
@@ -137,7 +358,6 @@ async def get_related_documents(
137358
detail=f"Document with id {document_id} not found"
138359
)
139360

140-
from sqlalchemy import or_
141361
related_query = (
142362
select(KnowledgeDocument)
143363
.where(KnowledgeDocument.id != document_id)
@@ -154,7 +374,7 @@ async def get_related_documents(
154374
related_result = await db.execute(related_query)
155375
related_docs = related_result.scalars().all()
156376

157-
return [DocumentResponse.from_orm(doc) for doc in related_docs]
377+
return [_to_document_response(doc) for doc in related_docs]
158378

159379
except HTTPException:
160380
raise
@@ -211,7 +431,7 @@ async def create_document(
211431
raise handle_internal_error("RAG indexing", rag_err)
212432

213433
await db.commit()
214-
return DocumentResponse.from_orm(document)
434+
return _to_document_response(document)
215435

216436
except HTTPException:
217437
raise
@@ -246,7 +466,10 @@ async def update_document(
246466
update_data = document_data.dict(exclude_unset=True)
247467
for field, value in update_data.items():
248468
if field in ALLOWED_UPDATE_FIELDS:
249-
setattr(document, field, value)
469+
if field == "metadata":
470+
document.document_metadata = value
471+
else:
472+
setattr(document, field, value)
250473

251474
document.updated_at = datetime.utcnow()
252475

@@ -267,7 +490,7 @@ async def update_document(
267490

268491
await db.commit()
269492
await db.refresh(document)
270-
return DocumentResponse.from_orm(document)
493+
return _to_document_response(document)
271494

272495
except HTTPException:
273496
raise
@@ -391,7 +614,7 @@ async def upload_document(
391614

392615
await db.commit()
393616
await db.refresh(document)
394-
return DocumentResponse.from_orm(document)
617+
return _to_document_response(document)
395618

396619
except HTTPException:
397620
raise

0 commit comments

Comments
 (0)