diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..5430430
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,22 @@
+# Milvus Vector Database
+MILVUS_HOST=localhost
+MILVUS_PORT=19530
+MILVUS_COLLECTION=docs_rag
+MILVUS_VECTOR_FIELD=vector
+
+# LLM Configuration
+# For local development with Ollama:
+KSERVE_URL=http://localhost:11434/v1/chat/completions
+MODEL=llama3.1:8b
+# For production with KServe:
+# KSERVE_URL=http://llama.docs-agent.svc.cluster.local/openai/v1/chat/completions
+# MODEL=llama3.1-8B
+
+# Embedding Model
+EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
+
+# API Server
+PORT=8000
+
+# GitHub Token (optional - increases API rate limit)
+# GITHUB_TOKEN=ghp_your_token_here
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b161c7b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3.11-slim
+
+# Create non-root user
+RUN useradd -m -u 1000 appuser
+WORKDIR /app
+
+# Install server dependencies
+COPY server-https/requirements.txt ./requirements-server.txt
+RUN pip install --no-cache-dir -r requirements-server.txt
+
+# Install ingestion dependencies
+RUN pip install --no-cache-dir requests langchain-text-splitters
+
+# Environment variables
+ENV PORT=8000
+
+# Switch to non-root before running the app
+USER appuser
+
+# Copy server and ingestion script
+COPY server-https/app.py /app/
+COPY scripts/local_ingest.py /app/
+
+EXPOSE 8000
+CMD ["python", "-u", "app.py"]
diff --git a/README.md b/README.md
index b387d19..a3ce5cd 100644
--- a/README.md
+++ b/README.md
@@ -9,11 +9,13 @@ The official LLM implementation of the Kubeflow Documentation Assistant powered
 - [Overview](#overview)
 - [Architecture](#architecture)
 - [Prerequisites](#prerequisites)
-- [Installation](#installation)
+- [Local Development Setup](#local-development-setup)
+- [Installation (Kubernetes)](#installation)
   - [Milvus Vector Database](#milvus-vector-database)
   - [KServe Inference Service](#kserve-inference-service)
   - [Kubeflow Pipelines](#kubeflow-pipelines)
   - [API Server](#api-server)
+- [Local vs Production](#local-vs-production)
 - [Usage](#usage)
 - [Configuration](#configuration)
 - [Troubleshooting](#troubleshooting)
@@ -58,8 +60,97 @@ Kubeflow users often struggle to find relevant information across the extensive
 - GPU nodes (for LLM inference)
 - SSL certificate (for HTTPS API)
 
+## Local Development Setup
+
+For local development without a Kubernetes cluster, you can use Docker Compose to run the full stack locally.
+
+### Prerequisites
+
+- [Docker Desktop](https://www.docker.com/products/docker-desktop/)
+- [Ollama](https://ollama.ai/) (for local LLM inference)
+
+### 1. Install Ollama
+
+Ollama runs the Llama 3.1 model locally on your machine.
+
+**macOS:**
+```bash
+brew install ollama
+```
+
+**Linux:**
+```bash
+curl -fsSL https://ollama.com/install.sh | sh
+```
+
+Start the Ollama server and pull the model:
+```bash
+ollama serve
+ollama pull llama3.1:8b
+```
+
+### 2. Clone and Configure
+
+```bash
+git clone https://github.com/kubeflow/docs-agent.git
+cd docs-agent
+cp .env.example .env
+```
+
+### 3. Start All Services
+
+```bash
+docker-compose up --build -d
+```
+
+This starts:
+- **Milvus** — vector database (port `19530`)
+- **etcd** — metadata storage for Milvus
+- **MinIO** — object storage for Milvus
+- **API Server** — FastAPI server (port `8000`) with hot reload
+
+> **Note:** The `--build` flag is only needed on the first run or after changing the Dockerfile/requirements. For subsequent runs, use `docker-compose up -d`.
+
+Verify all services are running:
+```bash
+docker-compose ps
+```
+
+### 4. Ingest Documentation
+
+Populate Milvus with Kubeflow documentation:
+```bash
+docker exec docs-agent-api python local_ingest.py
+```
+
+This is a one-time step. Re-run only when documentation is updated.
+
+### 5. Test
+
+```bash
+curl -X POST http://localhost:8000/chat \
+  -H "Content-Type: application/json" \
+  -d '{"message": "What is Kubeflow?", "stream": false}'
+```
+
+### Docker Compose Commands
+
+| Command | Description |
+|---------|-------------|
+| `docker-compose up -d` | Start all services in background |
+| `docker-compose up --build -d` | Rebuild and start (after Dockerfile/requirements change) |
+| `docker-compose ps` | Check service status |
+| `docker-compose logs -f` | View logs |
+| `docker-compose logs -f api-server` | View API server logs only |
+| `docker-compose down` | Stop all services |
+| `docker-compose down -v` | Stop and remove all data |
+
+---
+
 ## Installation
 
+The following instructions are for deploying to a **Kubernetes cluster** in production.
+
 ### Milvus Vector Database
 
 #### What is Milvus?
@@ -536,6 +627,19 @@ if data.get('citations'):
     print(f"Sources: {data['citations']}")
 ```
 
+## Local vs Production
+
+| Component | Local Development | Production (Kubernetes) |
+|-----------|------------------|------------------------|
+| **Vector DB** | Docker Compose (Milvus standalone) | Helm chart on Kubernetes |
+| **LLM** | Ollama (runs on CPU/Apple Silicon) | KServe + vLLM (NVIDIA GPU) |
+| **Data Ingestion** | `scripts/local_ingest.py` | Kubeflow Pipelines |
+| **API Server** | `python server-https/app.py` | Kubernetes Deployment |
+| **Service Mesh** | Not needed | Istio (mTLS + RBAC) |
+| **SSL** | Plain HTTP | HTTPS with certificates |
+
+Both setups use the same API format and produce identical results. The local setup uses lightweight alternatives that don't require Kubernetes or GPUs.
+
 ## Configuration
 
 ### Environment Variables
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..094f60f
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,86 @@
+services:
+  etcd:
+    image: quay.io/coreos/etcd:v3.5.18
+    container_name: milvus-etcd
+    environment:
+      - ETCD_AUTO_COMPACTION_MODE=revision
+      - ETCD_AUTO_COMPACTION_RETENTION=1000
+      - ETCD_QUOTA_BACKEND_BYTES=4294967296
+      - ETCD_SNAPSHOT_COUNT=50000
+    volumes:
+      - etcd_data:/etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+    healthcheck:
+      test: ["CMD", "etcdctl", "endpoint", "health"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  minio:
+    image: minio/minio:RELEASE.2023-03-20T20-16-18Z
+    container_name: milvus-minio
+    environment:
+      MINIO_ACCESS_KEY: minioadmin
+      MINIO_SECRET_KEY: minioadmin
+    ports:
+      - "9001:9001"
+    volumes:
+      - minio_data:/minio_data
+    command: minio server /minio_data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  milvus:
+    image: milvusdb/milvus:v2.4.17
+    container_name: milvus-standalone
+    environment:
+      ETCD_ENDPOINTS: etcd:2379
+      MINIO_ADDRESS: minio:9000
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    volumes:
+      - milvus_data:/var/lib/milvus
+    command: ["milvus", "run", "standalone"]
+    depends_on:
+      etcd:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  api-server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: docs-agent-api
+    ports:
+      - "8000:8000"
+    environment:
+      MILVUS_HOST: milvus
+      MILVUS_PORT: "19530"
+      MILVUS_COLLECTION: docs_rag
+      MILVUS_VECTOR_FIELD: vector
+      KSERVE_URL: http://host.docker.internal:11434/v1/chat/completions
+      MODEL: llama3.1:8b
+      EMBEDDING_MODEL: sentence-transformers/all-mpnet-base-v2
+      PORT: "8000"
+    volumes:
+      - ./server-https/app.py:/app/app.py
+      - ./scripts/local_ingest.py:/app/local_ingest.py
+    command: ["python", "-u", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+    depends_on:
+      milvus:
+        condition: service_healthy
+
+volumes:
+  etcd_data:
+  minio_data:
+  milvus_data:
diff --git a/scripts/local_ingest.py b/scripts/local_ingest.py
new file mode 100644
index 0000000..8c71e1e
--- /dev/null
+++ b/scripts/local_ingest.py
@@ -0,0 +1,178 @@
+"""
+Local data ingestion script for development.
+
+This replicates what the Kubeflow Pipeline does, but runs locally.
+In production, use the KFP pipeline (pipelines/kubeflow-pipeline.py) instead.
+
+Usage:
+    source .env
+    python scripts/local_ingest.py
+"""
+
+import os
+import re
+import json
+import base64
+import requests
+from datetime import datetime
+from sentence_transformers import SentenceTransformer
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from pymilvus import connections, utility, FieldSchema, CollectionSchema, DataType, Collection
+
+
+def connect_milvus():
+    host = os.getenv("MILVUS_HOST", "localhost")
+    port = os.getenv("MILVUS_PORT", "19530")
+    connections.connect("default", host=host, port=port)
+    print(f"Connected to Milvus at {host}:{port}")
+
+
+def create_collection(name):
+    if utility.has_collection(name):
+        Collection(name).drop()
+        print(f"Dropped existing collection: {name}")
+
+    fields = [
+        FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
+        FieldSchema(name="file_unique_id", dtype=DataType.VARCHAR, max_length=512),
+        FieldSchema(name="repo_name", dtype=DataType.VARCHAR, max_length=256),
+        FieldSchema(name="file_path", dtype=DataType.VARCHAR, max_length=512),
+        FieldSchema(name="file_name", dtype=DataType.VARCHAR, max_length=256),
+        FieldSchema(name="citation_url", dtype=DataType.VARCHAR, max_length=1024),
+        FieldSchema(name="chunk_index", dtype=DataType.INT64),
+        FieldSchema(name="content_text", dtype=DataType.VARCHAR, max_length=2000),
+        FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=768),
+        FieldSchema(name="last_updated", dtype=DataType.INT64),
+    ]
+    schema = CollectionSchema(fields, "RAG collection for documentation")
+    collection = Collection(name, schema)
+    print(f"Created collection: {name}")
+    return collection
+
+
+def fetch_docs(repo_owner, repo_name, directory_path, token=None, max_files=20):
+    headers = {"Authorization": f"token {token}"} if token else {}
+    files = []
+
+    def fetch_recursive(path):
+        if len(files) >= max_files:
+            return
+        api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{path}"
+        response = requests.get(api_url, headers=headers)
+        response.raise_for_status()
+        items = response.json()
+
+        for item in items:
+            if len(files) >= max_files:
+                return
+            if item["type"] == "file" and item["name"].endswith(".md"):
+                try:
+                    file_resp = requests.get(item["url"], headers=headers)
+                    file_resp.raise_for_status()
+                    content = base64.b64decode(file_resp.json()["content"]).decode("utf-8")
+                    files.append({
+                        "path": item["path"],
+                        "content": content,
+                        "file_name": item["name"],
+                    })
+                    print(f"  Fetched: {item['path']}")
+                except Exception as e:
+                    print(f"  Error fetching {item['path']}: {e}")
+            elif item["type"] == "dir":
+                fetch_recursive(item["path"])
+
+    fetch_recursive(directory_path)
+    print(f"Fetched {len(files)} files from GitHub")
+    return files
+
+
+def clean_content(content):
+    content = re.sub(r"^\s*[+-]{3,}.*?[+-]{3,}\s*", "", content, flags=re.DOTALL | re.MULTILINE)
+    content = re.sub(r"\{\{.*?\}\}", "", content, flags=re.DOTALL)
+    content = re.sub(r"<!--.*?-->", "", content, flags=re.DOTALL)
+    content = re.sub(r"<[^>]+>", " ", content)
+    content = re.sub(r"https?://[^\s]+", "", content)
+    content = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", content)
+    content = re.sub(r"\s+", " ", content)
+    return content.strip()
+
+
+def chunk_and_embed(files, model, repo_name, base_url, chunk_size=1000, chunk_overlap=100):
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        separators=["\n\n", "\n", ". ", " ", ""],
+    )
+    records = []
+    timestamp = int(datetime.now().timestamp())
+
+    for f in files:
+        content = clean_content(f["content"])
+        if len(content) < 50:
+            print(f"  Skipping (too short): {f['file_name']}")
+            continue
+
+        chunks = splitter.split_text(content)
+        for idx, chunk in enumerate(chunks):
+            embedding = model.encode(chunk).tolist()
+            records.append({
+                "file_unique_id": f"{repo_name}:{f['path']}",
+                "repo_name": repo_name,
+                "file_path": f["path"],
+                "file_name": f["file_name"],
+                "citation_url": f"{base_url}/{f['file_name'].replace('.md', '')}"[:1024],
+                "chunk_index": idx,
+                "content_text": chunk[:2000],
+                "vector": embedding,
+                "last_updated": timestamp,
+            })
+        print(f"  Chunked: {f['file_name']} -> {len(chunks)} chunks")
+
+    print(f"Total: {len(records)} vectors")
+    return records
+
+
+def store_vectors(collection, records):
+    if not records:
+        print("No records to store")
+        return
+
+    batch_size = 1000
+    for i in range(0, len(records), batch_size):
+        batch = records[i : i + batch_size]
+        collection.insert(batch)
+
+    collection.flush()
+    collection.create_index(
+        "vector",
+        {"metric_type": "COSINE", "index_type": "IVF_FLAT", "params": {"nlist": 128}},
+    )
+    collection.load()
+    print(f"Stored {len(records)} vectors in Milvus")
+
+
+def main():
+    collection_name = os.getenv("MILVUS_COLLECTION", "docs_rag")
+    embedding_model = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-mpnet-base-v2")
+    github_token = os.getenv("GITHUB_TOKEN", "")
+
+    connect_milvus()
+    collection = create_collection(collection_name)
+
+    print("\nLoading embedding model...")
+    model = SentenceTransformer(embedding_model)
+
+    print("\nFetching docs from GitHub...")
+    files = fetch_docs("kubeflow", "website", "content/en/docs", github_token)
+
+    print("\nChunking and embedding...")
+    records = chunk_and_embed(files, model, "website", "https://www.kubeflow.org/docs")
+
+    print("\nStoring in Milvus...")
+    store_vectors(collection, records)
+
+    print("\nDone!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/server-https/Dockerfile b/server-https/Dockerfile
index 2212413..499f104 100644
--- a/server-https/Dockerfile
+++ b/server-https/Dockerfile
@@ -1,4 +1,4 @@
-    FROM python:3.11-slim
+FROM python:3.11-slim
 
 # Create non-root user
 RUN useradd -m -u 1000 appuser