11"""知识管理API"""
22from fastapi import APIRouter , Depends , HTTPException , Request , status , Query
33from sqlalchemy .ext .asyncio import AsyncSession
4- from sqlalchemy import select , func
5- from typing import List , Optional
4+ from sqlalchemy import select , func , or_
5+ from typing import List , Optional , Dict , Any
66from datetime import datetime
7+ from pydantic import BaseModel , Field
78
8- from app .core .database import get_db , KnowledgeDocument
9+ from app .core .database import get_db , KnowledgeDocument , SearchHistory
910from app .core .auth import get_current_user
1011from app .core .rag_engine import RAGEngine
1112from app .utils .validation import (
1213 validate_search_query , validate_document_content ,
13- validate_metadata , InputValidator
14+ validate_metadata
1415)
1516from app .models .knowledge import (
1617 DocumentCreate , DocumentUpdate , DocumentResponse ,
17- DocumentListResponse , DocumentSearchRequest
18+ DocumentListResponse
1819)
1920from app .models .user import User
2021from app .utils .error_handlers import handle_internal_error
2122
2223router = APIRouter ()
2324
2425
26+ class KnowledgeSearchRequest (BaseModel ):
27+ query : str = Field (..., min_length = 1 , max_length = 1000 )
28+ limit : int = Field (default = 10 , ge = 1 , le = 100 )
29+ strategy : str = Field (default = "semantic" )
30+ source_types : Optional [List [str ]] = None
31+ filters : Optional [Dict [str , Any ]] = None
32+
33+
34+ def _document_to_search_result (doc : KnowledgeDocument , score : float = 0.5 , search_type : str = "keyword" ) -> Dict [str , Any ]:
35+ updated_at = doc .updated_at .isoformat () if doc .updated_at else ""
36+ created_at = doc .created_at .isoformat () if doc .created_at else ""
37+ return {
38+ "id" : str (doc .id ),
39+ "title" : doc .title ,
40+ "content" : (doc .content or "" )[:500 ],
41+ "type" : doc .source_type or "unknown" ,
42+ "source" : doc .source_path or doc .source_type or "unknown" ,
43+ "size" : f"{ len ((doc .content or '' ).encode ('utf-8' ))} B" ,
44+ "views" : 0 ,
45+ "starred" : False ,
46+ "tags" : doc .tags or [],
47+ "created_at" : created_at ,
48+ "updated_at" : updated_at ,
49+ "updated" : updated_at ,
50+ "score" : float (max (0.0 , min (1.0 , score ))),
51+ "search_type" : search_type ,
52+ }
53+
54+
55+ def _to_document_response (doc : KnowledgeDocument ) -> DocumentResponse :
56+ """Map ORM entity to API response and preserve metadata field naming."""
57+ return DocumentResponse (
58+ id = doc .id ,
59+ title = doc .title ,
60+ content = doc .content ,
61+ source_type = doc .source_type ,
62+ source_path = doc .source_path ,
63+ metadata = doc .document_metadata or {},
64+ chunk_index = doc .chunk_index ,
65+ total_chunks = doc .total_chunks ,
66+ tags = doc .tags or [],
67+ category = doc .category ,
68+ embedding_id = doc .embedding_id ,
69+ created_at = doc .created_at ,
70+ updated_at = doc .updated_at ,
71+ is_active = doc .is_active ,
72+ )
73+
74+
2575@router .get ("/documents" , response_model = DocumentListResponse )
2676async def list_documents (
2777 request : Request ,
2878 skip : int = Query (default = 0 , ge = 0 , le = 10000 ),
2979 limit : int = Query (default = 100 , ge = 1 , le = 1000 ),
80+ page : Optional [int ] = Query (default = None , ge = 1 ),
81+ sort : str = Query (default = "updated" ),
3082 source_type : Optional [str ] = None ,
3183 is_active : Optional [bool ] = True ,
3284 tags : Optional [str ] = None ,
@@ -36,6 +88,9 @@ async def list_documents(
3688):
3789 """获取文档列表"""
3890 try :
91+ if page is not None and skip == 0 :
92+ skip = (page - 1 ) * limit
93+
3994 query = select (KnowledgeDocument )
4095
4196 if source_type :
@@ -47,7 +102,15 @@ async def list_documents(
47102 if category :
48103 query = query .where (KnowledgeDocument .category == category )
49104
50- query = query .offset (skip ).limit (limit ).order_by (KnowledgeDocument .updated_at .desc ())
105+ sort_key = (sort or "updated" ).lower ()
106+ if sort_key == "created" :
107+ order_by = KnowledgeDocument .created_at .desc ()
108+ elif sort_key == "title" :
109+ order_by = KnowledgeDocument .title .asc ()
110+ else :
111+ order_by = KnowledgeDocument .updated_at .desc ()
112+
113+ query = query .offset (skip ).limit (limit ).order_by (order_by )
51114
52115 result = await db .execute (query )
53116 documents = result .scalars ().all ()
@@ -78,7 +141,7 @@ async def list_documents(
78141 total = len (documents )
79142
80143 return DocumentListResponse (
81- documents = [DocumentResponse . from_orm (doc ) for doc in documents ],
144+ documents = [_to_document_response (doc ) for doc in documents ],
82145 total = total ,
83146 skip = skip ,
84147 limit = limit
@@ -88,6 +151,164 @@ async def list_documents(
88151 raise handle_internal_error ("Document listing" , e )
89152
90153
154+ @router .get ("/stats" )
155+ async def get_knowledge_stats (
156+ db : AsyncSession = Depends (get_db ),
157+ current_user : User = Depends (get_current_user ),
158+ ):
159+ """知识库统计(兼容旧前端和测试脚本)"""
160+ try :
161+ start = datetime .utcnow ()
162+ since = datetime .utcnow ().replace (hour = 0 , minute = 0 , second = 0 , microsecond = 0 )
163+
164+ result = await db .execute (
165+ select (
166+ func .count ().label ("total" ),
167+ func .count ().filter (KnowledgeDocument .created_at >= since ).label ("today_imports" ),
168+ ).where (KnowledgeDocument .is_active == True )
169+ )
170+ row = result .one ()
171+
172+ took_ms = (datetime .utcnow () - start ).total_seconds () * 1000
173+ return {
174+ "total_documents" : row .total or 0 ,
175+ "total_searches" : 0 ,
176+ "today_imports" : row .today_imports or 0 ,
177+ "vector_store_size" : f"{ row .total or 0 } docs" ,
178+ "status" : "healthy" ,
179+ "took_ms" : round (took_ms , 2 ),
180+ }
181+ except Exception as e :
182+ raise handle_internal_error ("Knowledge stats" , e )
183+
184+
185+ @router .get ("/search-modes" )
186+ async def get_search_modes (current_user : User = Depends (get_current_user )):
187+ """返回可用搜索模式(前端配置页面兼容接口)"""
188+ return {
189+ "modes" : [
190+ {"id" : "semantic" , "name" : "Semantic" , "description" : "Embedding-based semantic retrieval" },
191+ {"id" : "keyword" , "name" : "Keyword" , "description" : "Keyword matching over titles and content" },
192+ {"id" : "hybrid" , "name" : "Hybrid" , "description" : "Semantic + keyword blended ranking" },
193+ ],
194+ "default" : "semantic" ,
195+ }
196+
197+
198+ @router .post ("/search" )
199+ async def search_documents (
200+ search_request : KnowledgeSearchRequest ,
201+ request : Request ,
202+ db : AsyncSession = Depends (get_db ),
203+ current_user : User = Depends (get_current_user ),
204+ ):
205+ """搜索文档(兼容前端 `/api/v1/knowledge/search`)"""
206+ try :
207+ start = datetime .utcnow ()
208+ query_text = validate_search_query (search_request .query )
209+ strategy = (search_request .strategy or "semantic" ).lower ()
210+ limit = search_request .limit
211+
212+ source_types = search_request .source_types or []
213+ if not source_types and isinstance (search_request .filters , dict ):
214+ f_source_types = search_request .filters .get ("source_types" )
215+ if isinstance (f_source_types , list ):
216+ source_types = [str (v ) for v in f_source_types if v ]
217+
218+ base_query = select (KnowledgeDocument ).where (KnowledgeDocument .is_active == True )
219+ if source_types :
220+ base_query = base_query .where (KnowledgeDocument .source_type .in_ (source_types ))
221+
222+ results : List [Dict [str , Any ]] = []
223+ used_strategy = "keyword"
224+ matched_count = 0
225+
226+ # 1) Semantic / Hybrid path through RAG
227+ rag_engine : Optional [RAGEngine ] = getattr (request .app .state , "rag_engine" , None )
228+ rag_usable = bool (getattr (request .app .state , "rag_available" , False ) and rag_engine is not None )
229+
230+ if strategy in {"semantic" , "hybrid" } and rag_usable :
231+ try :
232+ rag_hits = await rag_engine .search (query = query_text , top_k = min (100 , max (limit * 2 , limit )))
233+ if rag_hits :
234+ hit_ids = [int (h .document_id ) for h in rag_hits if str (h .document_id ).isdigit ()]
235+ if hit_ids :
236+ db_rows = await db .execute (
237+ base_query .where (KnowledgeDocument .id .in_ (hit_ids ))
238+ )
239+ doc_map = {doc .id : doc for doc in db_rows .scalars ().all ()}
240+ for hit in rag_hits :
241+ if not str (hit .document_id ).isdigit ():
242+ continue
243+ doc = doc_map .get (int (hit .document_id ))
244+ if doc is None :
245+ continue
246+ results .append (_document_to_search_result (doc , score = hit .score , search_type = "semantic" ))
247+ used_strategy = "semantic"
248+ except Exception :
249+ # Graceful fallback to keyword search below
250+ results = []
251+
252+ # 2) Keyword path (or fallback)
253+ if not results or strategy == "keyword" :
254+ kq = (
255+ base_query .where (
256+ or_ (
257+ KnowledgeDocument .title .ilike (f"%{ query_text } %" ),
258+ KnowledgeDocument .content .ilike (f"%{ query_text } %" ),
259+ )
260+ )
261+ .order_by (KnowledgeDocument .updated_at .desc ())
262+ .limit (limit * 3 )
263+ )
264+ kw_result = await db .execute (kq )
265+ kw_docs = kw_result .scalars ().all ()
266+ kw_items = []
267+ for doc in kw_docs :
268+ title_hit = query_text .lower () in (doc .title or "" ).lower ()
269+ score = 0.9 if title_hit else 0.75
270+ kw_items .append (_document_to_search_result (doc , score = score , search_type = "keyword" ))
271+
272+ if strategy == "hybrid" and results :
273+ # Merge semantic + keyword by max score
274+ index : Dict [str , Dict [str , Any ]] = {item ["id" ]: item for item in results }
275+ for item in kw_items :
276+ existing = index .get (item ["id" ])
277+ if existing is None or item ["score" ] > existing ["score" ]:
278+ index [item ["id" ]] = item
279+ results = list (index .values ())
280+ used_strategy = "hybrid"
281+ else :
282+ results = kw_items
283+ used_strategy = "keyword"
284+
285+ # Sort and cap
286+ results .sort (key = lambda x : x .get ("score" , 0 ), reverse = True )
287+ results = results [:limit ]
288+ matched_count = len (results )
289+
290+ # Save history for analytics
291+ db .add (
292+ SearchHistory (
293+ query = query_text ,
294+ results_count = matched_count ,
295+ user_id = str (getattr (current_user , "username" , "unknown" )),
296+ session_id = request .headers .get ("X-Request-ID" ),
297+ )
298+ )
299+
300+ took_ms = (datetime .utcnow () - start ).total_seconds () * 1000
301+ return {
302+ "query" : query_text ,
303+ "results" : results ,
304+ "total" : matched_count ,
305+ "search_type" : used_strategy ,
306+ "total_took_ms" : round (took_ms , 2 ),
307+ }
308+ except Exception as e :
309+ raise handle_internal_error ("Knowledge search" , e )
310+
311+
91312@router .get ("/documents/{document_id}" , response_model = DocumentResponse )
92313async def get_document (
93314 document_id : int ,
@@ -109,7 +330,7 @@ async def get_document(
109330 detail = f"Document with id { document_id } not found"
110331 )
111332
112- return DocumentResponse . from_orm (document )
333+ return _to_document_response (document )
113334
114335 except HTTPException :
115336 raise
@@ -137,7 +358,6 @@ async def get_related_documents(
137358 detail = f"Document with id { document_id } not found"
138359 )
139360
140- from sqlalchemy import or_
141361 related_query = (
142362 select (KnowledgeDocument )
143363 .where (KnowledgeDocument .id != document_id )
@@ -154,7 +374,7 @@ async def get_related_documents(
154374 related_result = await db .execute (related_query )
155375 related_docs = related_result .scalars ().all ()
156376
157- return [DocumentResponse . from_orm (doc ) for doc in related_docs ]
377+ return [_to_document_response (doc ) for doc in related_docs ]
158378
159379 except HTTPException :
160380 raise
@@ -211,7 +431,7 @@ async def create_document(
211431 raise handle_internal_error ("RAG indexing" , rag_err )
212432
213433 await db .commit ()
214- return DocumentResponse . from_orm (document )
434+ return _to_document_response (document )
215435
216436 except HTTPException :
217437 raise
@@ -246,7 +466,10 @@ async def update_document(
246466 update_data = document_data .dict (exclude_unset = True )
247467 for field , value in update_data .items ():
248468 if field in ALLOWED_UPDATE_FIELDS :
249- setattr (document , field , value )
469+ if field == "metadata" :
470+ document .document_metadata = value
471+ else :
472+ setattr (document , field , value )
250473
251474 document .updated_at = datetime .utcnow ()
252475
@@ -267,7 +490,7 @@ async def update_document(
267490
268491 await db .commit ()
269492 await db .refresh (document )
270- return DocumentResponse . from_orm (document )
493+ return _to_document_response (document )
271494
272495 except HTTPException :
273496 raise
@@ -391,7 +614,7 @@ async def upload_document(
391614
392615 await db .commit ()
393616 await db .refresh (document )
394- return DocumentResponse . from_orm (document )
617+ return _to_document_response (document )
395618
396619 except HTTPException :
397620 raise
0 commit comments