pooja-intel
diff --git a/‎education-ai-suite/smart-classroom/content_search/README.md‎
Lines changed: 48 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/README.md‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/api/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/api.py‎
Lines changed: 7 additions & 2 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/api.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/object.py‎
Lines changed: 59 additions & 6 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/object.py‎
Lines changed: 59 additions & 6 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/system.py‎
Lines changed: 8 additions & 3 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/system.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/task.py‎
Lines changed: 9 additions & 4 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/task.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/vecdatabase.py‎
Lines changed: 128 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/api/v1/endpoints/vecdatabase.py‎
Lines changed: 128 additions & 0 deletions
@@ -1,8 +1,56 @@
 # Content Search
 ## Prerequisites
+1 python3
+ python3.10
+
+2 postgreSQL
+postgreSQL installation refers to [PostgreSQL installation](./docs/dev_guide/Installation.md#postgresql)
+3 Minio
+minio installation refers to [Minio installation](./docs/dev_guide/Installation.md#minio)
+
+4 System Tools: Required for multimodal processing:
+
+- Tesseract OCR: For image/PDF text extraction.
+- Poppler: For PDF rendering.
+
 ## Environment Setup
+### Create/activate python venv
+```powershell
+# Create venv
+& '<your python dir>' -m venv venv
+.\venv\Scripts\Activate.ps1
+```
+
+###
+```powershell
+cd xxx/content_search
+python -m pip install --upgrade pip
+pip install -r .\requirements.txt
+```
 ## Launch
+```powershell
+cd xxx/content_search
+python .\start_services.py
+```
 // todo
+## Avaliable Endpoints
+
+| Endpoint | Method | Pattern | Description | Status |
+| :--- | :---: | :---: | :--- | :---: |
+| `/api/v1/system/health` | **GET** | SYNC | Backend app health check | DONE |
+| `/api/v1/task/query/{task_id}` | **GET** | SYNC | Query status of a specific task | DONE |
+| `/api/v1/task/list` | **GET** | SYNC | Query tasks by conditions (e.g., `?status=PROCESSING`) | DONE |
+| `/api/v1/task/cancel/{task_id}` | **POST** | SYNC | Cancel a running task | WIP |
+| `/api/v1/task/pause/{task_id}` | **POST** | SYNC | Pause a running task | WIP |
+| `/api/v1/task/resume/{task_id}` | **POST** | SYNC | Resume a paused task | WIP |
+| `/api/v1/object/files` | **GET** | SYNC | Query files in MinIO with filters | DONE |
+| `/api/v1/object/upload` | **POST** | ASYNC | Upload a file to MinIO | DONE |
+| `/api/v1/object/ingest` | **POST** | ASYNC | Ingest a specific file from MinIO | WIP |
+| `/api/v1/object/ingest-text` | **POST** | ASYNC | Emedding a raw text | WIP |
+| `/api/v1/object/upload-ingest` | **POST** | ASYNC | Upload to MinIO and trigger ingestion | DONE |
+| `/api/v1/object/search` | **POST** | ASYNC | Search for files based on description | DONE |
+| `/api/v1/object/download` | **POST** | STREAM | Download file from MinIO | DONE |
+| `/api/v1/video/summarization` | **POST** | STREAM | Generate video summarization | WIP |
 
 ## API reference
 [Content Search API reference](./docs/dev_guide/Content_search_API.md)
 
@@ -1,2 +1,4 @@
+#
 # Copyright (C) 2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+#
@@ -0,0 +1,5 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
@@ -0,0 +1,5 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
@@ -1,10 +1,15 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
 # api/v1/api.py
 from fastapi import APIRouter
-from api.v1.endpoints import system, object, task
+from api.v1.endpoints import system, object, task, vecdatabase
 
 api_router = APIRouter()
 
 api_router.include_router(system.router, prefix="/system", tags=["System"])
 api_router.include_router(object.router, prefix="/object", tags=["EDU-AI Process"])
 api_router.include_router(task.router, prefix="/task", tags=["EDU-AI Task"])
-
+api_router.include_router(vecdatabase.router, prefix="/vecdb", tags=["Chroma Database"])
@@ -0,0 +1,5 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
@@ -1,13 +1,18 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
 from fastapi import APIRouter, Depends, HTTPException, File, UploadFile, BackgroundTasks
 from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session
-from database import get_db
-from services.task_service import task_service
-from services.storage_service import storage_service
-from services.search_service import search_service
+from utils.database import get_db
+from utils.task_service import task_service
+from utils.storage_service import storage_service
+from utils.search_service import search_service
 import urllib.parse
 import mimetypes
-from core.responses import resp_200
+from utils.core_responses import resp_200
 
 router = APIRouter()
 
@@ -26,7 +31,55 @@ async def upload_video(background_tasks: BackgroundTasks, file: UploadFile = Fil
         message="File received, processing started."
     )
 
-# @router.post("/ingest")
+@router.post("/ingest")
+async def ingest_existing_file(
+    payload: dict,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db)
+):
+    bucket_name = payload.get("bucket_name", "content-search")
+    file_key = payload.get("file_key")
+    if not file_key:
+        raise HTTPException(status_code=400, detail="file_key is required")
+
+    minio_payload = {
+        "file_key": file_key,
+        "bucket_name": bucket_name,
+    }
+    result = await task_service.handle_file_ingest(db, minio_payload, background_tasks)
+
+    return resp_200(
+        data={
+            "task_id": str(result["task_id"]),
+            "status": result["status"],
+            "file_key": file_key
+        },
+        message="Ingestion process started for existing file"
+    )
+
+@router.post("/ingest-text")
+async def ingest_raw_text(
+    payload: dict,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db)
+):
+    text = payload.get("text")
+    if not text:
+        raise HTTPException(status_code=400, detail="Text content is required")
+
+    result = await task_service.handle_text_ingest(
+        db,
+        payload,
+        background_tasks
+    )
+
+    return resp_200(
+        data={
+            "task_id": str(result["task_id"]),
+            "status": result["status"]
+        },
+        message="Text ingestion started"
+    )
 
 @router.post("/upload-ingest")
 async def upload_file_with_ingest(
 
@@ -1,8 +1,13 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
 from fastapi import APIRouter, Depends
 from sqlalchemy.orm import Session
 from sqlalchemy import text
-from database import get_db
-from services.storage_service import storage_service
+from utils.database import get_db
+from utils.storage_service import storage_service
 import time
 
 router = APIRouter()
@@ -39,4 +44,4 @@ async def health_check(db: Session = Depends(get_db)):
         status["services"]["minio"] = f"offline: {err}"
         status["status"] = "unhealthy"
 
-    return status
+    return status
@@ -1,11 +1,16 @@
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
 from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy.orm import Session
 from typing import Optional, List
-from database import get_db
-from crud.task_crud import task_crud
+from utils.database import get_db
+from utils.crud_task import task_crud
 from uuid import UUID
-from core.models import AITask
-from core.responses import resp_200
+from utils.core_models import AITask
+from utils.core_responses import resp_200
 
 router = APIRouter()
 @router.get("/list")
 
@@ -0,0 +1,128 @@
+import logging
+from fastapi import APIRouter, HTTPException
+from typing import Optional, List, Dict, Any
+
+from providers.chromadb_wrapper.chroma_client import ChromaClientWrapper
+
+logger = logging.getLogger(__name__)
+
+# Initialize the ChromaDB Wrapper
+chroma_db = ChromaClientWrapper()
+
+router = APIRouter()
+
+# --- Data Query Endpoints ---
+
+@router.get("/list-ids")
+async def list_ids(collection_name: str):
+    """
+    Retrieve all document IDs present in a specific collection.
+    """
+    results = chroma_db.query_all(collection_name=collection_name)
+    if not results:
+        # Return empty list instead of 404 to avoid breaking frontend loops
+        return {"ids": [], "count": 0}
+    
+    ids = [item['id'] for item in results]
+    return {"ids": ids, "count": len(ids)}
+
+@router.post("/get-by-ids")
+async def get_by_ids(collection_name: str, ids: List[str], include_vector: bool = False):
+    """
+    Fetch specific records (metadata and optionally vectors) by their IDs.
+    """
+    output_fields = ['meta']
+    if include_vector:
+        output_fields.append('vector')
+    
+    try:
+        results = chroma_db.get(ids=ids, output_fields=output_fields, collection_name=collection_name)
+        return {"results": results}
+    except Exception as e:
+        logger.error(f"Error fetching IDs {ids}: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error during data retrieval")
+
+@router.post("/search")
+async def search_vectors(
+    collection_name: str, 
+    query_embeddings: List[List[float]], 
+    n_results: int = 5,
+    where: Optional[Dict[str, Any]] = None
+):
+    """
+    Perform vector similarity search. 
+    Main entry point for 'search by image' or 'search by text' features.
+    """
+    try:
+        results = chroma_db.query(
+            collection_name=collection_name,
+            query_embeddings=query_embeddings,
+            where=where,
+            n_results=n_results
+        )
+        return {"results": results}
+    except Exception as e:
+        logger.error(f"Vector search failed in {collection_name}: {e}")
+        raise HTTPException(status_code=500, detail="Vector search execution failed")
+
+# --- Data Manipulation Endpoints ---
+
+@router.post("/insert")
+async def insert_data(collection_name: str, data: List[Dict[str, Any]]):
+    """
+    Insert new vector data and metadata into the collection.
+    Expects data format: [{"id": "uuid", "vector": [...], "meta": {...}}]
+    """
+    try:
+        res = chroma_db.insert(data=data, collection_name=collection_name)
+        return {"status": "success", "info": res}
+    except Exception as e:
+        logger.error(f"Insertion failed: {e}")
+        raise HTTPException(status_code=400, detail="Invalid data format or database connection error")
+
+@router.delete("/delete")
+async def delete_data(collection_name: str, ids: List[str]):
+    """
+    Remove records from the collection by ID.
+    """
+    try:
+        res = chroma_db.delete(ids=ids, collection_name=collection_name)
+        return {"status": "success", "info": res}
+    except Exception as e:
+        logger.error(f"Deletion failed: {e}")
+        raise HTTPException(status_code=400, detail="Failed to delete specified IDs")
+
+# --- Collection Management Endpoints ---
+
+@router.get("/collections")
+async def list_collections():
+    """
+    List all available collections in the ChromaDB instance.
+    """
+    try:
+        collections = chroma_db.client.list_collections()
+        return {"collections": [c.name for c in collections]}
+    except Exception as e:
+        logger.error(f"Could not list collections: {e}")
+        raise HTTPException(status_code=500, detail="Database connection error")
+
+@router.get("/count")
+async def get_collection_count(collection_name: str):
+    """
+    Get the total number of items stored in a collection.
+    """
+    coll = chroma_db.load_collection(collection_name)
+    if coll:
+        return {"collection": collection_name, "count": coll.count()}
+    raise HTTPException(status_code=404, detail=f"Collection '{collection_name}' not found")
+
+@router.delete("/drop-collection")
+async def drop_collection(collection_name: str):
+    """
+    Completely delete a collection and all its data. Use with caution.
+    """
+    try:
+        chroma_db.client.delete_collection(name=collection_name)
+        return {"status": "success", "message": f"Collection '{collection_name}' deleted"}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
-Original file line number
+Diff line change
@@ @@ -1,2 +1,4 @@ @@
 +#
 # Copyright (C) 2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 +#