kaneliu120
diff --git a/‎backend/app/api/v1/analytics.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/app/api/v1/analytics.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/app/api/v1/knowledge.py‎
Lines changed: 237 additions & 14 deletions b/‎backend/app/api/v1/knowledge.py‎
Lines changed: 237 additions & 14 deletions
@@ -17,7 +17,7 @@
 
 @router.get("/overview")
 async def get_overview(
-    period: str = Query(default="7d", regex="^(7d|30d|90d)$"),
+    period: str = Query(default="7d", pattern="^(7d|30d|90d)$"),
     current_user: User = Depends(get_current_user),
 ) -> Dict[str, Any]:
     start = datetime.utcnow()
@@ -95,10 +95,10 @@ async def get_document_stats(
 
 @router.get("/search-trends")
 async def get_search_trends(
-    period: str = Query(default="7d", regex="^(7d|30d)$"),
+    period: str = Query(default="7d", pattern="^(7d|30d|90d)$"),
     current_user: User = Depends(get_current_user),
 ) -> Dict[str, Any]:
-    period_days_map = {"7d": 7, "30d": 30}
+    period_days_map = {"7d": 7, "30d": 30, "90d": 90}
     days = period_days_map[period]
     since = datetime.utcnow() - timedelta(days=days)
 
 
@@ -1,32 +1,84 @@
 """知识管理API"""
 from fastapi import APIRouter, Depends, HTTPException, Request, status, Query
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select, func
-from typing import List, Optional
+from sqlalchemy import select, func, or_
+from typing import List, Optional, Dict, Any
 from datetime import datetime
+from pydantic import BaseModel, Field
 
-from app.core.database import get_db, KnowledgeDocument
+from app.core.database import get_db, KnowledgeDocument, SearchHistory
 from app.core.auth import get_current_user
 from app.core.rag_engine import RAGEngine
 from app.utils.validation import (
     validate_search_query, validate_document_content,
-    validate_metadata, InputValidator
+    validate_metadata
 )
 from app.models.knowledge import (
     DocumentCreate, DocumentUpdate, DocumentResponse,
-    DocumentListResponse, DocumentSearchRequest
+    DocumentListResponse
 )
 from app.models.user import User
 from app.utils.error_handlers import handle_internal_error
 
 router = APIRouter()
 
 
+class KnowledgeSearchRequest(BaseModel):
+    query: str = Field(..., min_length=1, max_length=1000)
+    limit: int = Field(default=10, ge=1, le=100)
+    strategy: str = Field(default="semantic")
+    source_types: Optional[List[str]] = None
+    filters: Optional[Dict[str, Any]] = None
+
+
+def _document_to_search_result(doc: KnowledgeDocument, score: float = 0.5, search_type: str = "keyword") -> Dict[str, Any]:
+    updated_at = doc.updated_at.isoformat() if doc.updated_at else ""
+    created_at = doc.created_at.isoformat() if doc.created_at else ""
+    return {
+        "id": str(doc.id),
+        "title": doc.title,
+        "content": (doc.content or "")[:500],
+        "type": doc.source_type or "unknown",
+        "source": doc.source_path or doc.source_type or "unknown",
+        "size": f"{len((doc.content or '').encode('utf-8'))} B",
+        "views": 0,
+        "starred": False,
+        "tags": doc.tags or [],
+        "created_at": created_at,
+        "updated_at": updated_at,
+        "updated": updated_at,
+        "score": float(max(0.0, min(1.0, score))),
+        "search_type": search_type,
+    }
+
+
+def _to_document_response(doc: KnowledgeDocument) -> DocumentResponse:
+    """Map ORM entity to API response and preserve metadata field naming."""
+    return DocumentResponse(
+        id=doc.id,
+        title=doc.title,
+        content=doc.content,
+        source_type=doc.source_type,
+        source_path=doc.source_path,
+        metadata=doc.document_metadata or {},
+        chunk_index=doc.chunk_index,
+        total_chunks=doc.total_chunks,
+        tags=doc.tags or [],
+        category=doc.category,
+        embedding_id=doc.embedding_id,
+        created_at=doc.created_at,
+        updated_at=doc.updated_at,
+        is_active=doc.is_active,
+    )
+
+
 @router.get("/documents", response_model=DocumentListResponse)
 async def list_documents(
     request: Request,
     skip: int = Query(default=0, ge=0, le=10000),
     limit: int = Query(default=100, ge=1, le=1000),
+    page: Optional[int] = Query(default=None, ge=1),
+    sort: str = Query(default="updated"),
     source_type: Optional[str] = None,
     is_active: Optional[bool] = True,
     tags: Optional[str] = None,
@@ -36,6 +88,9 @@ async def list_documents(
 ):
     """获取文档列表"""
     try:
+        if page is not None and skip == 0:
+            skip = (page - 1) * limit
+
         query = select(KnowledgeDocument)
 
         if source_type:
@@ -47,7 +102,15 @@ async def list_documents(
         if category:
             query = query.where(KnowledgeDocument.category == category)
 
-        query = query.offset(skip).limit(limit).order_by(KnowledgeDocument.updated_at.desc())
+        sort_key = (sort or "updated").lower()
+        if sort_key == "created":
+            order_by = KnowledgeDocument.created_at.desc()
+        elif sort_key == "title":
+            order_by = KnowledgeDocument.title.asc()
+        else:
+            order_by = KnowledgeDocument.updated_at.desc()
+
+        query = query.offset(skip).limit(limit).order_by(order_by)
 
         result = await db.execute(query)
         documents = result.scalars().all()
@@ -78,7 +141,7 @@ async def list_documents(
             total = len(documents)
 
         return DocumentListResponse(
-            documents=[DocumentResponse.from_orm(doc) for doc in documents],
+            documents=[_to_document_response(doc) for doc in documents],
             total=total,
             skip=skip,
             limit=limit
@@ -88,6 +151,164 @@ async def list_documents(
         raise handle_internal_error("Document listing", e)
 
 
+@router.get("/stats")
+async def get_knowledge_stats(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """知识库统计（兼容旧前端和测试脚本）"""
+    try:
+        start = datetime.utcnow()
+        since = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
+
+        result = await db.execute(
+            select(
+                func.count().label("total"),
+                func.count().filter(KnowledgeDocument.created_at >= since).label("today_imports"),
+            ).where(KnowledgeDocument.is_active == True)
+        )
+        row = result.one()
+
+        took_ms = (datetime.utcnow() - start).total_seconds() * 1000
+        return {
+            "total_documents": row.total or 0,
+            "total_searches": 0,
+            "today_imports": row.today_imports or 0,
+            "vector_store_size": f"{row.total or 0} docs",
+            "status": "healthy",
+            "took_ms": round(took_ms, 2),
+        }
+    except Exception as e:
+        raise handle_internal_error("Knowledge stats", e)
+
+
+@router.get("/search-modes")
+async def get_search_modes(current_user: User = Depends(get_current_user)):
+    """返回可用搜索模式（前端配置页面兼容接口）"""
+    return {
+        "modes": [
+            {"id": "semantic", "name": "Semantic", "description": "Embedding-based semantic retrieval"},
+            {"id": "keyword", "name": "Keyword", "description": "Keyword matching over titles and content"},
+            {"id": "hybrid", "name": "Hybrid", "description": "Semantic + keyword blended ranking"},
+        ],
+        "default": "semantic",
+    }
+
+
+@router.post("/search")
+async def search_documents(
+    search_request: KnowledgeSearchRequest,
+    request: Request,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """搜索文档（兼容前端 `/api/v1/knowledge/search`）"""
+    try:
+        start = datetime.utcnow()
+        query_text = validate_search_query(search_request.query)
+        strategy = (search_request.strategy or "semantic").lower()
+        limit = search_request.limit
+
+        source_types = search_request.source_types or []
+        if not source_types and isinstance(search_request.filters, dict):
+            f_source_types = search_request.filters.get("source_types")
+            if isinstance(f_source_types, list):
+                source_types = [str(v) for v in f_source_types if v]
+
+        base_query = select(KnowledgeDocument).where(KnowledgeDocument.is_active == True)
+        if source_types:
+            base_query = base_query.where(KnowledgeDocument.source_type.in_(source_types))
+
+        results: List[Dict[str, Any]] = []
+        used_strategy = "keyword"
+        matched_count = 0
+
+        # 1) Semantic / Hybrid path through RAG
+        rag_engine: Optional[RAGEngine] = getattr(request.app.state, "rag_engine", None)
+        rag_usable = bool(getattr(request.app.state, "rag_available", False) and rag_engine is not None)
+
+        if strategy in {"semantic", "hybrid"} and rag_usable:
+            try:
+                rag_hits = await rag_engine.search(query=query_text, top_k=min(100, max(limit * 2, limit)))
+                if rag_hits:
+                    hit_ids = [int(h.document_id) for h in rag_hits if str(h.document_id).isdigit()]
+                    if hit_ids:
+                        db_rows = await db.execute(
+                            base_query.where(KnowledgeDocument.id.in_(hit_ids))
+                        )
+                        doc_map = {doc.id: doc for doc in db_rows.scalars().all()}
+                        for hit in rag_hits:
+                            if not str(hit.document_id).isdigit():
+                                continue
+                            doc = doc_map.get(int(hit.document_id))
+                            if doc is None:
+                                continue
+                            results.append(_document_to_search_result(doc, score=hit.score, search_type="semantic"))
+                    used_strategy = "semantic"
+            except Exception:
+                # Graceful fallback to keyword search below
+                results = []
+
+        # 2) Keyword path (or fallback)
+        if not results or strategy == "keyword":
+            kq = (
+                base_query.where(
+                    or_(
+                        KnowledgeDocument.title.ilike(f"%{query_text}%"),
+                        KnowledgeDocument.content.ilike(f"%{query_text}%"),
+                    )
+                )
+                .order_by(KnowledgeDocument.updated_at.desc())
+                .limit(limit * 3)
+            )
+            kw_result = await db.execute(kq)
+            kw_docs = kw_result.scalars().all()
+            kw_items = []
+            for doc in kw_docs:
+                title_hit = query_text.lower() in (doc.title or "").lower()
+                score = 0.9 if title_hit else 0.75
+                kw_items.append(_document_to_search_result(doc, score=score, search_type="keyword"))
+
+            if strategy == "hybrid" and results:
+                # Merge semantic + keyword by max score
+                index: Dict[str, Dict[str, Any]] = {item["id"]: item for item in results}
+                for item in kw_items:
+                    existing = index.get(item["id"])
+                    if existing is None or item["score"] > existing["score"]:
+                        index[item["id"]] = item
+                results = list(index.values())
+                used_strategy = "hybrid"
+            else:
+                results = kw_items
+                used_strategy = "keyword"
+
+        # Sort and cap
+        results.sort(key=lambda x: x.get("score", 0), reverse=True)
+        results = results[:limit]
+        matched_count = len(results)
+
+        # Save history for analytics
+        db.add(
+            SearchHistory(
+                query=query_text,
+                results_count=matched_count,
+                user_id=str(getattr(current_user, "username", "unknown")),
+                session_id=request.headers.get("X-Request-ID"),
+            )
+        )
+
+        took_ms = (datetime.utcnow() - start).total_seconds() * 1000
+        return {
+            "query": query_text,
+            "results": results,
+            "total": matched_count,
+            "search_type": used_strategy,
+            "total_took_ms": round(took_ms, 2),
+        }
+    except Exception as e:
+        raise handle_internal_error("Knowledge search", e)
+
+
 @router.get("/documents/{document_id}", response_model=DocumentResponse)
 async def get_document(
     document_id: int,
@@ -109,7 +330,7 @@ async def get_document(
                 detail=f"Document with id {document_id} not found"
             )
 
-        return DocumentResponse.from_orm(document)
+        return _to_document_response(document)
 
     except HTTPException:
         raise
@@ -137,7 +358,6 @@ async def get_related_documents(
                 detail=f"Document with id {document_id} not found"
             )
 
-        from sqlalchemy import or_
         related_query = (
             select(KnowledgeDocument)
             .where(KnowledgeDocument.id != document_id)
@@ -154,7 +374,7 @@ async def get_related_documents(
         related_result = await db.execute(related_query)
         related_docs = related_result.scalars().all()
 
-        return [DocumentResponse.from_orm(doc) for doc in related_docs]
+        return [_to_document_response(doc) for doc in related_docs]
 
     except HTTPException:
         raise
@@ -211,7 +431,7 @@ async def create_document(
                 raise handle_internal_error("RAG indexing", rag_err)
 
         await db.commit()
-        return DocumentResponse.from_orm(document)
+        return _to_document_response(document)
 
     except HTTPException:
         raise
@@ -246,7 +466,10 @@ async def update_document(
         update_data = document_data.dict(exclude_unset=True)
         for field, value in update_data.items():
             if field in ALLOWED_UPDATE_FIELDS:
-                setattr(document, field, value)
+                if field == "metadata":
+                    document.document_metadata = value
+                else:
+                    setattr(document, field, value)
 
         document.updated_at = datetime.utcnow()
 
@@ -267,7 +490,7 @@ async def update_document(
 
         await db.commit()
         await db.refresh(document)
-        return DocumentResponse.from_orm(document)
+        return _to_document_response(document)
 
     except HTTPException:
         raise
@@ -391,7 +614,7 @@ async def upload_document(
 
         await db.commit()
         await db.refresh(document)
-        return DocumentResponse.from_orm(document)
+        return _to_document_response(document)
 
     except HTTPException:
         raise