shivama205
diff --git a/‎.env.example‎
Lines changed: 10 additions & 0 deletions b/‎.env.example‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 65 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 13 additions & 1 deletion b/‎Makefile‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎app/api/endpoints/knowledge_bases.py‎
Lines changed: 1 addition & 1 deletion b/‎app/api/endpoints/knowledge_bases.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/core/config.py‎
Lines changed: 15 additions & 5 deletions b/‎app/core/config.py‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎app/core/middleware.py‎
Lines changed: 62 additions & 1 deletion b/‎app/core/middleware.py‎
Lines changed: 62 additions & 1 deletion
diff --git a/‎app/main.py‎
Lines changed: 18 additions & 3 deletions b/‎app/main.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎app/repositories/storage_repository.py‎
Lines changed: 2 additions & 1 deletion b/‎app/repositories/storage_repository.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎app/services/rag/chunker/chunker_factory.py‎
Lines changed: 2 additions & 1 deletion b/‎app/services/rag/chunker/chunker_factory.py‎
Lines changed: 2 additions & 1 deletion
@@ -34,6 +34,16 @@ MYSQL_USER=docbrain
 MYSQL_PASSWORD=password
 MYSQL_DATABASE=docbrain
 
+# CORS (comma-separated origins)
+CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
+
+# Rate Limiting
+RATE_LIMIT_PER_MINUTE=60
+
+# Email (SendGrid)
+SENDGRID_API_KEY=your-sendgrid-api-key
+FROM_EMAIL=noreply@yourdomain.com
+
 # File Upload
 MAX_FILE_SIZE_MB=10
 UPLOAD_DIR=/data/uploads 
@@ -0,0 +1,65 @@
+name: CI
+
+on:
+  push:
+    branches: [main, dev, develop]
+  pull_request:
+    branches: [main, dev, develop]
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install linting tools
+        run: pip install flake8 black isort
+
+      - name: Check formatting with black
+        run: black --check --diff app/ tests/
+
+      - name: Check import ordering with isort
+        run: isort --check-only --diff app/ tests/
+
+      - name: Lint with flake8
+        run: flake8 app/ tests/ --max-line-length 120 --ignore E501,W503,E402
+
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install test dependencies
+        run: pip install -r requirements-test.txt
+
+      - name: Run tests with coverage
+        run: pytest tests/ -v --cov=app --cov-report=term-missing --cov-report=xml
+        env:
+          ENVIRONMENT: test
+          SECRET_KEY: ci-test-secret-key
+          SENDGRID_API_KEY: test
+          FROM_EMAIL: test@example.com
+          PINECONE_API_KEY: test
+          PINECONE_ENVIRONMENT: test
+          WHITELISTED_EMAILS: test@example.com
+
+      - name: Upload coverage report
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: coverage-report
+          path: coverage.xml
@@ -22,7 +22,19 @@ worker:
 	sh ./restart_worker.sh
 
 test:
-	pytest
+	pytest tests/ -v
+
+test-cov:
+	pytest tests/ -v --cov=app --cov-report=term-missing
+
+lint:
+	black --check --diff app/ tests/
+	isort --check-only --diff app/ tests/
+	flake8 app/ tests/ --max-line-length 120 --ignore E501,W503,E402
+
+format:
+	black app/ tests/
+	isort app/ tests/
 
 clean:
 	find . -type d -name "__pycache__" -exec rm -r {} +
 
@@ -2,7 +2,8 @@
 
 # DocBrain - Self-Hosted RAG Framework
 
-![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
+[![CI](https://github.com/shivama205/DocBrain/actions/workflows/ci.yml/badge.svg?branch=dev)](https://github.com/shivama205/DocBrain/actions/workflows/ci.yml)
+![Python Version](https://img.shields.io/badge/python-3.11%2B-blue)
 ![License](https://img.shields.io/badge/license-MIT-green)
 ![Security](https://img.shields.io/badge/security-self--hosted-brightgreen)
 
 
@@ -202,7 +202,7 @@ async def get_shared_users(
 @router.post("/{kb_id}/documents", response_model=DocumentResponse)
 async def create_document(
     kb_id: str = Path(..., description="Knowledge base ID"),
-    file: UploadFile = Annotated[..., File(..., description="Document to upload")],
+    file: UploadFile = File(..., description="Document to upload"),
     current_user: UserResponse = Depends(get_current_user),
     doc_service: DocumentService = Depends(get_document_service)
 ):
 
@@ -12,15 +12,15 @@ class Settings(BaseSettings):
     # Security
     SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key")
     ALGORITHM: str = "HS256"
-    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8  # 8 days
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))  # 24 hours
 
     # Email Settings
-    SENDGRID_API_KEY: str
-    FROM_EMAIL: EmailStr
+    SENDGRID_API_KEY: str = os.getenv("SENDGRID_API_KEY", "")
+    FROM_EMAIL: str = os.getenv("FROM_EMAIL", "noreply@example.com")
 
     # Vector Store
     PINECONE_API_KEY: str = os.getenv("PINECONE_API_KEY", "")
-    PINECONE_ENVIRONMENT: str
+    PINECONE_ENVIRONMENT: str = os.getenv("PINECONE_ENVIRONMENT", "")
     PINECONE_INDEX_NAME: str = os.getenv("PINECONE_INDEX_NAME", "docbrain")
     PINECONE_SUMMARY_INDEX_NAME: str = os.getenv("PINECONE_SUMMARY_INDEX_NAME", "summary")
     PINECONE_QUESTIONS_INDEX_NAME: str = os.getenv("PINECONE_QUESTIONS_INDEX_NAME", "questions")
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
     REDIS_URL: str = "redis://localhost:6379/0"
 
     # Test Emails
-    WHITELISTED_EMAILS: str
+    WHITELISTED_EMAILS: str = os.getenv("WHITELISTED_EMAILS", "")
 
     # RAG
     RAG_TOP_K: int = 3
@@ -44,6 +44,9 @@ class Settings(BaseSettings):
     def WHITELISTED_EMAIL_LIST(self) -> List[str]:
         return [email.strip() for email in self.WHITELISTED_EMAILS.split(",")]
 
+    # Rate Limiting
+    RATE_LIMIT_PER_MINUTE: int = int(os.getenv("RATE_LIMIT_PER_MINUTE", "60"))
+
     # File Upload
     MAX_FILE_SIZE_MB: int = 10
     UPLOAD_DIR: str = "/data/uploads"
@@ -65,6 +68,13 @@ def DATABASE_URL(self) -> str:
     CELERY_RESULT_BACKEND: str = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
 
     # CORS
+    CORS_ORIGINS: str = os.getenv("CORS_ORIGINS", "http://localhost:5173,http://127.0.0.1:5173")
+
+    @property
+    def CORS_ORIGIN_LIST(self) -> List[str]:
+        """Parse comma-separated CORS origins."""
+        return [origin.strip() for origin in self.CORS_ORIGINS.split(",") if origin.strip()]
+
     BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = []
 
     # Storage
 
@@ -1,10 +1,17 @@
+import time
+import logging
+from collections import defaultdict
+
 from fastapi import Request, HTTPException, status
 from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import JSONResponse
 from typing import Dict, List, Callable, Optional
 
 from app.core.permissions import Permission, get_permissions_for_role
 from app.db.models.user import UserRole
 
+logger = logging.getLogger(__name__)
+
 
 class PermissionsMiddleware(BaseHTTPMiddleware):
     """
@@ -144,4 +151,58 @@ async def dispatch(self, request: Request, call_next: Callable):
         "PUT": [Permission.MANAGE_SYSTEM],
         "DELETE": [Permission.MANAGE_SYSTEM],
     },
-} 
+}
+
+
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    """
+    Simple in-memory rate limiting middleware.
+
+    Limits requests per client IP using a sliding window approach.
+    For production deployments with multiple workers, consider using
+    a Redis-backed solution instead.
+    """
+
+    def __init__(
+        self,
+        app,
+        requests_per_minute: int = 60,
+        exempt_paths: Optional[List[str]] = None,
+    ):
+        super().__init__(app)
+        self.requests_per_minute = requests_per_minute
+        self.exempt_paths = exempt_paths or ["/health", "/docs", "/openapi.json", "/redoc"]
+        # {client_ip: [timestamp, ...]}
+        self._requests: Dict[str, List[float]] = defaultdict(list)
+
+    def _get_client_ip(self, request: Request) -> str:
+        forwarded = request.headers.get("x-forwarded-for")
+        if forwarded:
+            return forwarded.split(",")[0].strip()
+        return request.client.host if request.client else "unknown"
+
+    def _cleanup(self, timestamps: List[float], now: float) -> List[float]:
+        """Remove timestamps older than 60 seconds."""
+        cutoff = now - 60.0
+        return [t for t in timestamps if t > cutoff]
+
+    async def dispatch(self, request: Request, call_next: Callable):
+        if any(request.url.path.startswith(p) for p in self.exempt_paths):
+            return await call_next(request)
+
+        client_ip = self._get_client_ip(request)
+        now = time.time()
+
+        # Clean old entries and record this request
+        self._requests[client_ip] = self._cleanup(self._requests[client_ip], now)
+
+        if len(self._requests[client_ip]) >= self.requests_per_minute:
+            logger.warning(f"Rate limit exceeded for {client_ip}")
+            return JSONResponse(
+                status_code=429,
+                content={"detail": "Too many requests. Please try again later."},
+                headers={"Retry-After": "60"},
+            )
+
+        self._requests[client_ip].append(now)
+        return await call_next(request) 
@@ -3,7 +3,7 @@
 
 from app.core.config import settings
 from app.api.endpoints import auth, knowledge_bases, conversations, messages, users
-from app.core.middleware import PermissionsMiddleware, DEFAULT_PATH_PERMISSIONS
+from app.core.middleware import PermissionsMiddleware, RateLimitMiddleware, DEFAULT_PATH_PERMISSIONS
 
 app = FastAPI(
     title=settings.APP_NAME,
@@ -13,12 +13,18 @@
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["http://localhost:5173", "http://127.0.0.1:5173", "*"],  # Explicitly allow frontend origin
+    allow_origins=settings.CORS_ORIGIN_LIST,
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 
+# Add rate limiting middleware
+app.add_middleware(
+    RateLimitMiddleware,
+    requests_per_minute=settings.RATE_LIMIT_PER_MINUTE,
+)
+
 # Add Permissions middleware
 app.add_middleware(
     PermissionsMiddleware,
@@ -34,4 +40,13 @@
 
 @app.get("/")
 async def root():
-    return {"message": "Welcome to DocBrain API"} 
+    return {"message": "Welcome to DocBrain API"}
+
+@app.get("/health")
+async def health():
+    """Health check endpoint for monitoring and orchestration."""
+    return {
+        "status": "healthy",
+        "service": settings.APP_NAME,
+        "version": app.version,
+    } 
@@ -22,7 +22,8 @@ async def insert_csv(db: Session, table_name: str, create_table_query: str, colu
 
             # insert the data one by one 
             for row in data:
-                INSERT_ROW_QUERY = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({', '.join([f"'{str(cell)}'" for cell in row])})"
+                values = ', '.join(["'{}'".format(str(cell)) for cell in row])
+                INSERT_ROW_QUERY = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({values})"
                 logger.info(f"Insert Row Query: {INSERT_ROW_QUERY}")
                 db.execute(text(INSERT_ROW_QUERY))
             db.commit()
 
@@ -23,7 +23,8 @@ def create_chunker(document_type: DocumentType) -> Chunker:
             Chunker instance
         """
         try:
-            # TODO: Implement chunker factory based on document type
+            # MultiLevelChunker works well across all document types.
+            # Extend here with type-specific chunkers if needed (e.g., CSV row-based).
             return MultiLevelChunker()
         except Exception as e:
             logger.error(f"Failed to create chunker: {e}", exc_info=True)