PredictiveManish
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎app/app.py‎ ‎app/__init__.py‎app/app.py renamed to app/__init__.py b/‎app/app.py‎ ‎app/__init__.py‎app/app.py renamed to app/__init__.py
diff --git a/‎app/config.py‎
Lines changed: 14 additions & 0 deletions b/‎app/config.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎app/discord_alert.py‎
Lines changed: 12 additions & 2 deletions b/‎app/discord_alert.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎app/drift_detector.py‎
Lines changed: 81 additions & 8 deletions b/‎app/drift_detector.py‎
Lines changed: 81 additions & 8 deletions
diff --git a/‎app/logger.py‎
Lines changed: 5 additions & 6 deletions b/‎app/logger.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎app/main.py‎
Lines changed: 121 additions & 5 deletions b/‎app/main.py‎
Lines changed: 121 additions & 5 deletions
diff --git a/‎app/model.pkl‎
169 KB b/‎app/model.pkl‎
169 KB
diff --git a/‎app/model.py‎
Lines changed: 1 addition & 1 deletion b/‎app/model.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,7 @@
+__pycache__/
+*.pyc
+*.db
+.env
+.DS_Store
+.pytest_cache/
+reset_db.py
@@ -0,0 +1,14 @@
+# app/config.py
+import os
+
+# Alert threshold (15%)
+ALERT_THRESHOLD = float(os.getenv("ALERT_THRESHOLD", 0.15))
+
+# Discord webhook URL
+DISCORD_WEBHOOK = os.getenv("DISCORD_WEBHOOK", "")
+
+# Database path
+DB_PATH = os.getenv("DB_PATH", "predictions.db")
+
+# Model path
+MODEL_PATH = os.path.join(os.path.dirname(__file__), "model.pkl")
@@ -1,9 +1,19 @@
 import os
 import requests
+import logging
 
 WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK", "https://discord.com/api/webhooks/1474862103138402306/UQo03RbfP4LXkxVNUVn_p-ZbLZXnmkBbrbDVJqOynKDbt32pBvi-TuCwrIRZl1_FGFvB")
 
 def send_alert(message):
-    data={"content": message}
-    requests.post(WEBHOOK_URL,json=data)
+    if not WEBHOOK_URL:
+        logging.warning("DISCORD WEBHOOD not set, skipping alert.")
+        return False
+    try:
+        data = {"content": message}
+        response = requests.post(WEBHOOK_URL, json=data)
+        response.raise_for_status()
+        return True
+    except Exception as e:
+        logging.error(f"Failed to send Discord alert: {e}")
+        
 
@@ -1,19 +1,92 @@
 import numpy as np
-from scipy.stats import ks_2samp
+from scipy.stats import ks_2samp, chi2_contingency
+import logging
 
-def compute_drift_score(baseline, recent):
+def compute_drift_score(baseline, recent, method='psi'):
     """
     Returns a drift score between 0 and 1.
-    Here we use Kolmogorov-Smirnov statistic (p-value complement)
+    
+    Methods:
+    - 'psi': Population Stability Index (best for categorical)
+    - 'ks': Kolmogorov-Smirnov (for continuous)
+    - 'chi2': Chi-square test (for categorical)
     """
     if len(baseline) == 0 or len(recent) == 0:
         return 0.0
-    # If categorical predictions (e.g., classes), use PSI or simple distribution difference
-    # For simplicity, we treat predictions as continuous (class indices) and use KS
-    ks_stat, p_value = ks_2samp(baseline, recent)
-    # Convert p-value to a score: lower p-value means higher drift
-    drift_score = 1 - p_value # ranges 0-1 
+    
+    if method == 'psi':
+        return compute_psi(baseline, recent)
+    elif method == 'chi2':
+        return compute_chi2_drift(baseline, recent)
+    else:
+        # Default to KS test
+        ks_stat, p_value = ks_2samp(baseline, recent)
+        # Convert to 0-1 scale where higher means more drift
+        drift_score = 1 - p_value
+        return min(max(drift_score, 0), 1)  # Clamp between 0-1
+
+def compute_psi(baseline, recent, bins=3):
+    """
+    Population Stability Index
+    PSI < 0.1: no significant drift
+    PSI 0.1-0.2: moderate drift
+    PSI > 0.2: significant drift
+    """
+    # For categorical predictions (0,1,2), we can use the classes as bins
+    unique_classes = sorted(set(baseline + recent))
+    
+    # If we have all 3 classes, use them as bins
+    if len(unique_classes) <= 3:
+        # Calculate distributions
+        baseline_counts = np.bincount(baseline, minlength=3)
+        recent_counts = np.bincount(recent, minlength=3)
+        
+        # Convert to percentages
+        baseline_pct = baseline_counts / len(baseline)
+        recent_pct = recent_counts / len(recent)
+        
+        # Add small epsilon to avoid division by zero
+        baseline_pct = np.clip(baseline_pct, 0.001, 0.999)
+        recent_pct = np.clip(recent_pct, 0.001, 0.999)
+        
+        # Calculate PSI
+        psi = np.sum((recent_pct - baseline_pct) * np.log(recent_pct / baseline_pct))
+        
+        # Normalize PSI to 0-1 scale (typical PSI > 0.2 is significant)
+        # So we'll map PSI 0-0.2 to 0-1 scale
+        normalized_psi = min(psi / 0.2, 1.0)
+        
+        logging.info(f"PSI: {psi:.3f}, Normalized: {normalized_psi:.3f}")
+        return normalized_psi
+    
+    else:
+        # Fallback to KS test if we have more classes
+        ks_stat, p_value = ks_2samp(baseline, recent)
+        return 1 - p_value
+
+def compute_chi2_drift(baseline, recent):
+    """
+    Chi-square test for categorical drift detection
+    """
+    # Create contingency table
+    unique_classes = sorted(set(baseline + recent))
+    
+    # Count frequencies
+    baseline_counts = [baseline.count(c) for c in unique_classes]
+    recent_counts = [recent.count(c) for c in unique_classes]
+    
+    # Create contingency table
+    contingency = np.array([baseline_counts, recent_counts])
+    
+    # Perform chi-square test
+    chi2, p_value, dof, expected = chi2_contingency(contingency)
+    
+    # Convert p-value to drift score (lower p-value = more drift)
+    drift_score = 1 - p_value
+    
+    logging.info(f"Chi2: {chi2:.3f}, p-value: {p_value:.3f}, Drift score: {drift_score:.3f}")
     return drift_score
 
 def should_alert(score, threshold=0.15):
+    """Determine if drift score exceeds threshold"""
     return score > threshold
@@ -8,17 +8,17 @@ def init_db():
     conn=sqlite3.connect(DB_FILE)
     c=conn.cursor()
     c.execute('''CREATE TABLE IF NOT EXISTS predictions
-              id INTEGER PRIMARY KEY AUTOINCREMENT,
+              (id INTEGER PRIMARY KEY AUTOINCREMENT,
               features TEXT,
-              prediciton INTEGER,
-              timestamp TEXT''')
+              prediction INTEGER,
+              timestamp TEXT)''')
     conn.commit()
     conn.close()
 
 def log_prediction(features, pred, timestamp):
     conn=sqlite3.connect(DB_FILE)
     c=conn.cursor()
-    c.execute("INSERT INTO predictions (features, prediction, timestamp) VALUES (?,?,?)", (json.dumps(features), pred, timestamp.isformat()))
+    c.execute("INSERT INTO predictions (features, prediction, timestamp) VALUES (?,?,?)", (json.dumps(features), pred, timestamp.isoformat()))
     conn.commit()
     conn.close()
 
@@ -31,7 +31,7 @@ def get_recent_predictions(limit=100):
     return [row[0] for row in rows]
 
 
-def initial_get_predictions(limit=100):
+def get_initial_predictions(limit=100):
     # For demo, using first 100 predictions as baseline
     conn=sqlite3.connect(DB_FILE)
     c=conn.cursor()
@@ -40,6 +40,5 @@ def initial_get_predictions(limit=100):
     conn.close()
     return [row[0] for row in rows]
 
-init_db() # ensures table exists
 
 
@@ -1,10 +1,126 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException
 from pydantic import BaseModel
-import numpy as np
 import pickle
+import numpy as np
+import os
 from datetime import datetime
-from logger import log_prediction, get_recent_predictions
-from drift_detector import compute_drift_score, should_alert
-from discord_alert import send_alert
-import asyncio
+from typing import List, Optional
+import json
+
+# Import your modules
+from app.logger import log_prediction, get_recent_predictions, get_initial_predictions, init_db
+from app.drift_detector import compute_drift_score, should_alert
+from app.discord_alert import send_alert
+from app import config
+
+# Initialize database on startup
+init_db()
+
+app = FastAPI(title="MLOps Drift Detection API")
+
+# Load model
+model_path = os.path.join(os.path.dirname(__file__), "model.pkl")
+if not os.path.exists(model_path):
+    print(f"Warning: Model file not found at {model_path}")
+    model = None
+else:
+    with open(model_path, 'rb') as f:
+        model = pickle.load(f)
+
+class InputData(BaseModel):
+    features: List[float]
+
+class PredictionResponse(BaseModel):
+    prediction: int
+    timestamp: str
+
+class DriftResponse(BaseModel):
+    drift_score: float
+    alert: bool
+    threshold: float
+    recent_samples: int
+    baseline_samples: int
+
+@app.get("/")
+async def root():
+    return {
+        "message": "MLOps Drift Detection API",
+        "status": "operational",
+        "model_loaded": model is not None
+    }
+
+@app.get("/healthz")
+async def healthz():
+    """Health check endpoint for Render"""
+    return {"status": "healthy", "timestamp": datetime.now().isoformat()}
+
+@app.post("/predict", response_model=PredictionResponse)
+async def predict(data: InputData, background_tasks: BackgroundTasks):
+    """Make a prediction and log it for drift detection"""
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    
+    # Convert to numpy array and reshape
+    features = np.array(data.features).reshape(1, -1)
+    
+    # Make prediction
+    pred = model.predict(features)[0]
+    
+    # Log prediction asynchronously
+    timestamp = datetime.now()
+    background_tasks.add_task(log_prediction, data.features, int(pred), timestamp)
+    
+    return {
+        "prediction": int(pred),
+        "timestamp": timestamp.isoformat()
+    }
+
+@app.get("/drift_score", response_model=DriftResponse)
+async def get_drift(background_tasks: BackgroundTasks):
+    """Calculate current drift score"""
+    # Get recent predictions (last 100)
+    recent = get_recent_predictions(limit=100)
+    
+    # Get baseline predictions (first 100)
+    baseline = get_initial_predictions(limit=100)
+    
+    if len(recent) < 10 or len(baseline) < 10:
+        return {
+            "drift_score": 0.0,
+            "alert": False,
+            "threshold": config.ALERT_THRESHOLD,
+            "recent_samples": len(recent),
+            "baseline_samples": len(baseline)
+        }
+    
+    # Compute drift score
+    score = compute_drift_score(baseline, recent, method='psi')
+    alert = should_alert(score, config.ALERT_THRESHOLD)
+    
+    # Send alert if needed
+    if alert:
+        background_tasks.add_task(send_alert, f"🚨 Drift detected! Score: {score:.3f}")
+    
+    return {
+        "drift_score": round(score, 3),
+        "alert": alert,
+        "threshold": config.ALERT_THRESHOLD,
+        "recent_samples": len(recent),
+        "baseline_samples": len(baseline)
+    }
 
+@app.get("/stats")
+async def get_stats():
+    """Get prediction statistics"""
+    recent = get_recent_predictions(limit=1000)
+    baseline = get_initial_predictions(limit=100)
+    
+    return {
+        "total_predictions": len(recent),
+        "baseline_size": len(baseline),
+        "recent_distribution": {
+            "0": recent.count(0) if recent else 0,
+            "1": recent.count(1) if recent else 0,
+            "2": recent.count(2) if recent else 0
+        } if recent else {}
+    }
@@ -7,7 +7,7 @@ def train_and_save():
     X,y = iris.data, iris.target
     model=RandomForestClassifier()
     model.fit(X,y)
-    with open('model.pkl', 'wb') as f:
+    with open('app/model.pkl', 'wb') as f:
         pickle.dump(model, f)
     # Also store the training data distribution for drift baseline
     return y