Skip to content

Commit 4e73ada

Browse files
authored
Merge pull request #1 from shivama205/claude/polish-project-WVv5o
Claude/polish project w vv5o
2 parents 0b9e640 + b545d67 commit 4e73ada

22 files changed

+843
-15
lines changed

.env.example

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ MYSQL_USER=docbrain
3434
MYSQL_PASSWORD=password
3535
MYSQL_DATABASE=docbrain
3636

37+
# CORS (comma-separated origins)
38+
CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
39+
40+
# Rate Limiting
41+
RATE_LIMIT_PER_MINUTE=60
42+
43+
# Email (SendGrid)
44+
SENDGRID_API_KEY=your-sendgrid-api-key
45+
FROM_EMAIL=noreply@yourdomain.com
46+
3747
# File Upload
3848
MAX_FILE_SIZE_MB=10
3949
UPLOAD_DIR=/data/uploads

.github/workflows/ci.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main, dev, develop]
6+
pull_request:
7+
branches: [main, dev, develop]
8+
9+
jobs:
10+
lint:
11+
name: Lint
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: "3.11"
20+
cache: pip
21+
22+
- name: Install linting tools
23+
run: pip install flake8 black isort
24+
25+
- name: Check formatting with black
26+
run: black --check --diff app/ tests/
27+
28+
- name: Check import ordering with isort
29+
run: isort --check-only --diff app/ tests/
30+
31+
- name: Lint with flake8
32+
run: flake8 app/ tests/ --max-line-length 120 --ignore E501,W503,E402
33+
34+
test:
35+
name: Test
36+
runs-on: ubuntu-latest
37+
steps:
38+
- uses: actions/checkout@v4
39+
40+
- name: Set up Python
41+
uses: actions/setup-python@v5
42+
with:
43+
python-version: "3.11"
44+
cache: pip
45+
46+
- name: Install test dependencies
47+
run: pip install -r requirements-test.txt
48+
49+
- name: Run tests with coverage
50+
run: pytest tests/ -v --cov=app --cov-report=term-missing --cov-report=xml
51+
env:
52+
ENVIRONMENT: test
53+
SECRET_KEY: ci-test-secret-key
54+
SENDGRID_API_KEY: test
55+
FROM_EMAIL: test@example.com
56+
PINECONE_API_KEY: test
57+
PINECONE_ENVIRONMENT: test
58+
WHITELISTED_EMAILS: test@example.com
59+
60+
- name: Upload coverage report
61+
uses: actions/upload-artifact@v4
62+
if: always()
63+
with:
64+
name: coverage-report
65+
path: coverage.xml

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,19 @@ worker:
2222
sh ./restart_worker.sh
2323

2424
test:
25-
pytest
25+
pytest tests/ -v
26+
27+
test-cov:
28+
pytest tests/ -v --cov=app --cov-report=term-missing
29+
30+
lint:
31+
black --check --diff app/ tests/
32+
isort --check-only --diff app/ tests/
33+
flake8 app/ tests/ --max-line-length 120 --ignore E501,W503,E402
34+
35+
format:
36+
black app/ tests/
37+
isort app/ tests/
2638

2739
clean:
2840
find . -type d -name "__pycache__" -exec rm -r {} +

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
# DocBrain - Self-Hosted RAG Framework
44

5-
![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
5+
[![CI](https://github.com/shivama205/DocBrain/actions/workflows/ci.yml/badge.svg?branch=dev)](https://github.com/shivama205/DocBrain/actions/workflows/ci.yml)
6+
![Python Version](https://img.shields.io/badge/python-3.11%2B-blue)
67
![License](https://img.shields.io/badge/license-MIT-green)
78
![Security](https://img.shields.io/badge/security-self--hosted-brightgreen)
89

app/api/endpoints/knowledge_bases.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ async def get_shared_users(
202202
@router.post("/{kb_id}/documents", response_model=DocumentResponse)
203203
async def create_document(
204204
kb_id: str = Path(..., description="Knowledge base ID"),
205-
file: UploadFile = Annotated[..., File(..., description="Document to upload")],
205+
file: UploadFile = File(..., description="Document to upload"),
206206
current_user: UserResponse = Depends(get_current_user),
207207
doc_service: DocumentService = Depends(get_document_service)
208208
):

app/core/config.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ class Settings(BaseSettings):
1212
# Security
1313
SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key")
1414
ALGORITHM: str = "HS256"
15-
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 # 8 days
15+
ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440")) # 24 hours
1616

1717
# Email Settings
18-
SENDGRID_API_KEY: str
19-
FROM_EMAIL: EmailStr
18+
SENDGRID_API_KEY: str = os.getenv("SENDGRID_API_KEY", "")
19+
FROM_EMAIL: str = os.getenv("FROM_EMAIL", "noreply@example.com")
2020

2121
# Vector Store
2222
PINECONE_API_KEY: str = os.getenv("PINECONE_API_KEY", "")
23-
PINECONE_ENVIRONMENT: str
23+
PINECONE_ENVIRONMENT: str = os.getenv("PINECONE_ENVIRONMENT", "")
2424
PINECONE_INDEX_NAME: str = os.getenv("PINECONE_INDEX_NAME", "docbrain")
2525
PINECONE_SUMMARY_INDEX_NAME: str = os.getenv("PINECONE_SUMMARY_INDEX_NAME", "summary")
2626
PINECONE_QUESTIONS_INDEX_NAME: str = os.getenv("PINECONE_QUESTIONS_INDEX_NAME", "questions")
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
3333
REDIS_URL: str = "redis://localhost:6379/0"
3434

3535
# Test Emails
36-
WHITELISTED_EMAILS: str
36+
WHITELISTED_EMAILS: str = os.getenv("WHITELISTED_EMAILS", "")
3737

3838
# RAG
3939
RAG_TOP_K: int = 3
@@ -44,6 +44,9 @@ class Settings(BaseSettings):
4444
def WHITELISTED_EMAIL_LIST(self) -> List[str]:
4545
return [email.strip() for email in self.WHITELISTED_EMAILS.split(",")]
4646

47+
# Rate Limiting
48+
RATE_LIMIT_PER_MINUTE: int = int(os.getenv("RATE_LIMIT_PER_MINUTE", "60"))
49+
4750
# File Upload
4851
MAX_FILE_SIZE_MB: int = 10
4952
UPLOAD_DIR: str = "/data/uploads"
@@ -65,6 +68,13 @@ def DATABASE_URL(self) -> str:
6568
CELERY_RESULT_BACKEND: str = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
6669

6770
# CORS
71+
CORS_ORIGINS: str = os.getenv("CORS_ORIGINS", "http://localhost:5173,http://127.0.0.1:5173")
72+
73+
@property
74+
def CORS_ORIGIN_LIST(self) -> List[str]:
75+
"""Parse comma-separated CORS origins."""
76+
return [origin.strip() for origin in self.CORS_ORIGINS.split(",") if origin.strip()]
77+
6878
BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = []
6979

7080
# Storage

app/core/middleware.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1+
import time
2+
import logging
3+
from collections import defaultdict
4+
15
from fastapi import Request, HTTPException, status
26
from starlette.middleware.base import BaseHTTPMiddleware
7+
from starlette.responses import JSONResponse
38
from typing import Dict, List, Callable, Optional
49

510
from app.core.permissions import Permission, get_permissions_for_role
611
from app.db.models.user import UserRole
712

13+
logger = logging.getLogger(__name__)
14+
815

916
class PermissionsMiddleware(BaseHTTPMiddleware):
1017
"""
@@ -144,4 +151,58 @@ async def dispatch(self, request: Request, call_next: Callable):
144151
"PUT": [Permission.MANAGE_SYSTEM],
145152
"DELETE": [Permission.MANAGE_SYSTEM],
146153
},
147-
}
154+
}
155+
156+
157+
class RateLimitMiddleware(BaseHTTPMiddleware):
158+
"""
159+
Simple in-memory rate limiting middleware.
160+
161+
Limits requests per client IP using a sliding window approach.
162+
For production deployments with multiple workers, consider using
163+
a Redis-backed solution instead.
164+
"""
165+
166+
def __init__(
167+
self,
168+
app,
169+
requests_per_minute: int = 60,
170+
exempt_paths: Optional[List[str]] = None,
171+
):
172+
super().__init__(app)
173+
self.requests_per_minute = requests_per_minute
174+
self.exempt_paths = exempt_paths or ["/health", "/docs", "/openapi.json", "/redoc"]
175+
# {client_ip: [timestamp, ...]}
176+
self._requests: Dict[str, List[float]] = defaultdict(list)
177+
178+
def _get_client_ip(self, request: Request) -> str:
179+
forwarded = request.headers.get("x-forwarded-for")
180+
if forwarded:
181+
return forwarded.split(",")[0].strip()
182+
return request.client.host if request.client else "unknown"
183+
184+
def _cleanup(self, timestamps: List[float], now: float) -> List[float]:
185+
"""Remove timestamps older than 60 seconds."""
186+
cutoff = now - 60.0
187+
return [t for t in timestamps if t > cutoff]
188+
189+
async def dispatch(self, request: Request, call_next: Callable):
190+
if any(request.url.path.startswith(p) for p in self.exempt_paths):
191+
return await call_next(request)
192+
193+
client_ip = self._get_client_ip(request)
194+
now = time.time()
195+
196+
# Clean old entries and record this request
197+
self._requests[client_ip] = self._cleanup(self._requests[client_ip], now)
198+
199+
if len(self._requests[client_ip]) >= self.requests_per_minute:
200+
logger.warning(f"Rate limit exceeded for {client_ip}")
201+
return JSONResponse(
202+
status_code=429,
203+
content={"detail": "Too many requests. Please try again later."},
204+
headers={"Retry-After": "60"},
205+
)
206+
207+
self._requests[client_ip].append(now)
208+
return await call_next(request)

app/main.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from app.core.config import settings
55
from app.api.endpoints import auth, knowledge_bases, conversations, messages, users
6-
from app.core.middleware import PermissionsMiddleware, DEFAULT_PATH_PERMISSIONS
6+
from app.core.middleware import PermissionsMiddleware, RateLimitMiddleware, DEFAULT_PATH_PERMISSIONS
77

88
app = FastAPI(
99
title=settings.APP_NAME,
@@ -13,12 +13,18 @@
1313
# Add CORS middleware
1414
app.add_middleware(
1515
CORSMiddleware,
16-
allow_origins=["http://localhost:5173", "http://127.0.0.1:5173", "*"], # Explicitly allow frontend origin
16+
allow_origins=settings.CORS_ORIGIN_LIST,
1717
allow_credentials=True,
1818
allow_methods=["*"],
1919
allow_headers=["*"],
2020
)
2121

22+
# Add rate limiting middleware
23+
app.add_middleware(
24+
RateLimitMiddleware,
25+
requests_per_minute=settings.RATE_LIMIT_PER_MINUTE,
26+
)
27+
2228
# Add Permissions middleware
2329
app.add_middleware(
2430
PermissionsMiddleware,
@@ -34,4 +40,13 @@
3440

3541
@app.get("/")
3642
async def root():
37-
return {"message": "Welcome to DocBrain API"}
43+
return {"message": "Welcome to DocBrain API"}
44+
45+
@app.get("/health")
46+
async def health():
47+
"""Health check endpoint for monitoring and orchestration."""
48+
return {
49+
"status": "healthy",
50+
"service": settings.APP_NAME,
51+
"version": app.version,
52+
}

app/repositories/storage_repository.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ async def insert_csv(db: Session, table_name: str, create_table_query: str, colu
2222

2323
# insert the data one by one
2424
for row in data:
25-
INSERT_ROW_QUERY = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({', '.join([f"'{str(cell)}'" for cell in row])})"
25+
values = ', '.join(["'{}'".format(str(cell)) for cell in row])
26+
INSERT_ROW_QUERY = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({values})"
2627
logger.info(f"Insert Row Query: {INSERT_ROW_QUERY}")
2728
db.execute(text(INSERT_ROW_QUERY))
2829
db.commit()

app/services/rag/chunker/chunker_factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ def create_chunker(document_type: DocumentType) -> Chunker:
2323
Chunker instance
2424
"""
2525
try:
26-
# TODO: Implement chunker factory based on document type
26+
# MultiLevelChunker works well across all document types.
27+
# Extend here with type-specific chunkers if needed (e.g., CSV row-based).
2728
return MultiLevelChunker()
2829
except Exception as e:
2930
logger.error(f"Failed to create chunker: {e}", exc_info=True)

0 commit comments

Comments
 (0)