llm-d-incubation · anfredette · Apr 1, 2026 · Mar 31, 2026 · Apr 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -54,7 +54,7 @@ jobs:
         run: >
           uv run pytest tests/ -v --tb=short
           -m "not database and not integration"
-          --cov=src/neuralnav
+          --cov=src/planner
           --cov-report=xml
           --cov-report=term
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -22,7 +22,7 @@ This repository contains the architecture design for **NeuralNav**, an open-sour
   - State machine for workflow orchestration
   - Entity-relationship diagrams for data models
 
-- **src/neuralnav/**: Python package (PyPA src layout)
+- **src/planner/**: Python package (PyPA src layout)
   - **api/**: FastAPI REST API layer
     - `app.py`: FastAPI app factory
     - `dependencies.py`: Singleton dependency injection
@@ -172,10 +172,10 @@ The recommendation engine uses **multi-criteria scoring** to rank configurations
 
 **Key Files**:
 
-- `src/neuralnav/recommendation/scorer.py` - Calculates 4 scores
-- `src/neuralnav/recommendation/quality/usecase_scorer.py` - Artificial Analysis benchmark scoring
-- `src/neuralnav/recommendation/analyzer.py` - Generates 5 ranked lists
-- `src/neuralnav/recommendation/config_finder.py` - Orchestrates scoring during capacity planning
+- `src/planner/recommendation/scorer.py` - Calculates 4 scores
+- `src/planner/recommendation/quality/usecase_scorer.py` - Artificial Analysis benchmark scoring
+- `src/planner/recommendation/analyzer.py` - Generates 5 ranked lists
+- `src/planner/recommendation/config_finder.py` - Orchestrates scoring during capacity planning
 
 ## Development Environment
 
@@ -325,7 +325,7 @@ All API endpoints **must** follow these rules:
 - **Health check exception**: `/health` stays at root with no prefix (standard for load balancer probes). This is the only endpoint outside `/api/v1/`.
 - **Versioning**: All endpoints are under `/api/v1/`. When a v2 is needed, add new route files with `prefix="/api/v2"`.
 - **Naming**: Use kebab-case for multi-word paths (e.g., `/deploy-to-cluster`, `/ranked-recommend-from-spec`).
-- **When adding a new route file**: Set `prefix="/api/v1"` on the `APIRouter` and use relative paths in all decorators. Register the router in `src/neuralnav/api/routes/__init__.py` and include it in `src/neuralnav/api/app.py`.
+- **When adding a new route file**: Set `prefix="/api/v1"` on the `APIRouter` and use relative paths in all decorators. Register the router in `src/planner/api/routes/__init__.py` and include it in `src/planner/api/app.py`.
 
 ### Common Editing Patterns
 
@@ -346,7 +346,7 @@ All API endpoints **must** follow these rules:
 6. Update docs/architecture-diagram.md data model ERD
 
 **Adding a new API endpoint**:
-1. Add the route to the appropriate file in `src/neuralnav/api/routes/` (or create a new route file)
+1. Add the route to the appropriate file in `src/planner/api/routes/` (or create a new route file)
 2. Use a relative path in the decorator (e.g., `@router.get("/my-endpoint")`) — the `/api/v1` prefix comes from the router
 3. If creating a new route file, set `APIRouter(prefix="/api/v1")` and register it in `routes/__init__.py` and `app.py`
 4. Update `ui/app.py` if the UI calls the new endpoint
@@ -356,7 +356,7 @@ All API endpoints **must** follow these rules:
 1. Add numbered section to docs/ARCHITECTURE.md (maintain sequential numbering)
 2. Update "Architecture Components" count in Overview
 3. Add to docs/architecture-diagram.md component diagram
-4. Create corresponding src/neuralnav/<component>/ directory
+4. Create corresponding src/planner/<component>/ directory
 5. Update sequence diagram if component participates in main flow
 6. Update Phase 1 technology choices table if relevant
 
@@ -439,7 +439,7 @@ The system now supports two deployment modes:
 - **Purpose**: GPU-free development and testing on local machines
 - **Location**: `simulator/` directory contains the vLLM simulator service
 - **Docker Image**: `vllm-simulator:latest` (single image for all models)
-- **Configuration**: Set `DeploymentGenerator(simulator_mode=True)` in `src/neuralnav/api/dependencies.py`
+- **Configuration**: Set `DeploymentGenerator(simulator_mode=True)` in `src/planner/api/dependencies.py`
 - **Benefits**:
   - No GPU hardware required
   - Fast deployment (~10-15 seconds to Ready)
@@ -449,7 +449,7 @@ The system now supports two deployment modes:
 
 ### Real vLLM Mode (Production)
 - **Purpose**: Actual model inference with GPUs
-- **Configuration**: Set `DeploymentGenerator(simulator_mode=False)` in `src/neuralnav/api/dependencies.py`
+- **Configuration**: Set `DeploymentGenerator(simulator_mode=False)` in `src/planner/api/dependencies.py`
 - **Requirements**:
   - GPU-enabled Kubernetes cluster
   - NVIDIA GPU Operator installed
@@ -477,7 +477,7 @@ The system now supports two deployment modes:
 
 ### Technical Details
 
-The deployment template (`src/neuralnav/configuration/templates/kserve-inferenceservice.yaml.j2`) uses Jinja2 conditionals:
+The deployment template (`src/planner/configuration/templates/kserve-inferenceservice.yaml.j2`) uses Jinja2 conditionals:
 - `{% if simulator_mode %}` - Uses `vllm-simulator:latest`, no GPU resources, fast health checks
 - `{% else %}` - Uses `vllm/vllm-openai:v0.6.2`, requests GPUs, longer health checks
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -288,7 +288,7 @@ make format
 
 ### Writing Tests
 
-- Test files should mirror source structure: `src/neuralnav/foo/bar.py` → `tests/test_foo_bar.py`
+- Test files should mirror source structure: `src/planner/foo/bar.py` → `tests/test_foo_bar.py`
 - Use descriptive test names: `test_plan_capacity_with_minimum_accuracy_threshold()`
 - Include both positive and negative test cases
 - Mock external dependencies (databases, APIs, LLM calls)

diff --git a/Dockerfile b/Dockerfile
@@ -21,7 +21,7 @@ COPY pyproject.toml uv.lock README.md ./
 RUN uv sync --frozen --no-dev --extra cluster
 
 # Copy backend source code
-COPY src/neuralnav ./src/neuralnav
+COPY src/planner ./src/planner
 
 # Copy data files (Knowledge Base)
 COPY data ./data

diff --git a/Makefile b/Makefile
@@ -57,6 +57,7 @@ PGDUMP_OUTPUT ?= data/benchmarks/performance/benchmarks_GuideLLM.json
 
 SRC_DIR := src
 UI_DIR := ui
+TEST_DIR := tests
 SIMULATOR_DIR := simulator
 
 VENV := .venv
@@ -191,7 +192,7 @@ start-backend: ## Start FastAPI backend
 	@if [ -f $(BACKEND_PID) ] && [ -s $(BACKEND_PID) ] && kill -0 $$(cat $(BACKEND_PID) 2>/dev/null) 2>/dev/null; then \
 		printf "$(YELLOW)Backend already running (PID: $$(cat $(BACKEND_PID)))$(NC)\n"; \
 	else \
-		( PYTHONPATH=src uv run uvicorn neuralnav.api.app:app --reload --host 0.0.0.0 --port 8000 > $(LOG_DIR)/backend.log 2>&1 & echo $$! > $(BACKEND_PID) ); \
+		( PYTHONPATH=src uv run uvicorn planner.api.app:app --reload --host 0.0.0.0 --port 8000 > $(LOG_DIR)/backend.log 2>&1 & echo $$! > $(BACKEND_PID) ); \
 		sleep 2; \
 		printf "$(GREEN)✓ Backend started (PID: $$(cat $(BACKEND_PID)))$(NC)\n"; \
 	fi
@@ -220,12 +221,12 @@ stop: ## Stop Backend + UI (leaves Ollama and DB running)
 	fi
 	@# Kill any remaining NeuralNav processes by pattern matching
 	@pkill -f "streamlit run ui/app.py" 2>/dev/null || true
-	@pkill -f "uvicorn neuralnav.api.app:app" 2>/dev/null || true
+	@pkill -f "uvicorn planner.api.app:app" 2>/dev/null || true
 	@# Give processes time to exit gracefully
 	@sleep 1
 	@# Force kill if still running
 	@pkill -9 -f "streamlit run ui/app.py" 2>/dev/null || true
-	@pkill -9 -f "uvicorn neuralnav.api.app:app" 2>/dev/null || true
+	@pkill -9 -f "uvicorn planner.api.app:app" 2>/dev/null || true
 	@printf "$(GREEN)✓ All NeuralNav services stopped$(NC)\n"
 	@# Don't stop Ollama or DB as they might be used by other apps/tools
 	@if [ "$(MAKECMDGOALS)" != "stop-all" ]; then \
@@ -538,12 +539,12 @@ test-integration: setup-ollama ## Run integration tests (requires Ollama and DB)
 
 lint: ## Run linters
 	@printf "$(BLUE)Running linters...$(NC)\n"
-	@if uv run ruff --version >/dev/null 2>&1; then uv run ruff check $(SRC_DIR)/ $(UI_DIR)/; else printf "$(YELLOW)ruff not installed, skipping$(NC)\n"; fi
+	@if uv run ruff --version >/dev/null 2>&1; then uv run ruff check $(SRC_DIR)/ $(TEST_DIR)/ $(UI_DIR)/; else printf "$(YELLOW)ruff not installed, skipping$(NC)\n"; fi
 	@printf "$(GREEN)✓ Linting complete$(NC)\n"
 
 format: ## Auto-format code
 	@printf "$(BLUE)Formatting code...$(NC)\n"
-	@if uv run ruff --version >/dev/null 2>&1; then uv run ruff format $(SRC_DIR)/ $(UI_DIR)/; else printf "$(YELLOW)ruff not installed, skipping$(NC)\n"; fi
+	@if uv run ruff --version >/dev/null 2>&1; then uv run ruff format $(SRC_DIR)/ $(TEST_DIR)/ $(UI_DIR)/; else printf "$(YELLOW)ruff not installed, skipping$(NC)\n"; fi
 	@printf "$(GREEN)✓ Formatting complete$(NC)\n"
 
 typecheck:  ## Run typecheck

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -74,7 +74,7 @@ services:
       CORS_ORIGINS: http://localhost:8501,http://ui:8501
     volumes:
       # Mount source code for development (hot reload)
-      - ./src/neuralnav:/app/src/neuralnav
+      - ./src/planner:/app/src/planner
       - ./data:/app/data
       - ./generated_configs:/app/generated_configs
       - ./logs:/app/logs

diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md
@@ -326,7 +326,7 @@ curl -X POST http://localhost:8000/api/v1/db/reset
 
 All loading methods are append-mode — duplicates (same model/hardware/traffic/load config) are silently skipped via `ON CONFLICT (config_id) DO NOTHING`.
 
-**Core loading logic** lives in `src/neuralnav/knowledge_base/loader.py` and is shared by the CLI script, API endpoints, and UI.
+**Core loading logic** lives in `src/planner/knowledge_base/loader.py` and is shared by the CLI script, API endpoints, and UI.
 
 ### Cluster Development
 
@@ -578,7 +578,7 @@ curl http://localhost:8080/health
 }
 ```
 
-2. Update `src/neuralnav/intent_extraction/extractor.py` USE_CASE_MAP
+2. Update `src/planner/intent_extraction/extractor.py` USE_CASE_MAP
 3. Restart backend
 
 ### Modifying the UI
@@ -593,14 +593,14 @@ UI code is in `ui/app.py`. Changes auto-reload in the browser.
 
 ### Modifying the Recommendation Algorithm
 
-**Model scoring:** `src/neuralnav/recommendation/scorer.py`
+**Model scoring:** `src/planner/recommendation/scorer.py`
 - `Scorer` class - Adjust scoring weights
 
-**Capacity planning:** `src/neuralnav/recommendation/config_finder.py`
+**Capacity planning:** `src/planner/recommendation/config_finder.py`
 - `plan_capacity()` - GPU sizing logic
 - `_calculate_required_replicas()` - Scaling calculations
 
-**Traffic profiling:** `src/neuralnav/specification/traffic_profile.py`
+**Traffic profiling:** `src/planner/specification/traffic_profile.py`
 - `generate_profile()` - Traffic estimation
 - `generate_slo_targets()` - SLO target generation
 
@@ -986,7 +986,7 @@ Once deployed:
 To use real vLLM with actual GPUs (requires GPU-enabled cluster):
 
 ```python
-# In src/neuralnav/api/routes.py
+# In src/planner/api/routes.py
 deployment_generator = DeploymentGenerator(simulator_mode=False)
 ```
 

diff --git a/docs/DOCKER.md b/docs/DOCKER.md
@@ -272,7 +272,7 @@ docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
 make docker-up-dev
 ```
 
-Edit files in `src/neuralnav/` or `ui/` - changes will be reflected immediately.
+Edit files in `src/planner/` or `ui/` - changes will be reflected immediately.
 
 ### Debugging
 
@@ -350,7 +350,7 @@ All services communicate via a bridge network (`neuralnav-network`):
 ### Volume Mounts
 
 **Development Volumes** (source code - hot reload):
-- `./src/neuralnav` → `/app/src/neuralnav`
+- `./src/planner` → `/app/src/planner`
 - `./ui` → `/app/ui`
 - `./data` → `/app/data`
 

diff --git a/docs/MIGRATION_PLAN.md b/docs/MIGRATION_PLAN.md
@@ -16,7 +16,7 @@ Config Explorer has 2 functional areas: **Capacity Planner** (memory/KV cache ca
 Config Explorer modules are **flattened** as peers alongside existing modules (not nested under a `config_explorer/` sub-package). A shared `cli/` package holds all CLI entry points.
 
 ```
-src/planner/                          # renamed from src/neuralnav/
+src/planner/                          # renamed from src/planner/
 ├── __init__.py
 ├── api/                              # (unchanged internal structure)
 ├── capacity_planner.py              # NEW: from config_explorer (memory/KV cache calculations)
@@ -193,11 +193,11 @@ elif view == "Config Explorer":
 
 ## 5. Step-by-Step Migration Plan
 
-### Step 1: Rename `src/neuralnav/` → `src/planner/`
+### Step 1: Rename `src/planner/` → `src/planner/`
 
 **Files to modify:**
 
-- `mv src/neuralnav src/planner`
+- `mv src/planner src/planner`
 - Update ALL `from neuralnav.` → `from planner.` imports in:
   - `src/planner/**/*.py` (17+ source files)
   - `tests/*.py` (conftest.py, 3 test files)

diff --git a/docs/NeuralNav Solution Ranking.md b/docs/NeuralNav Solution Ranking.md
@@ -158,11 +158,11 @@ Configurations within 20% of SLO targets are included with clear warnings:
 
 | Component | Location | Responsibility |
 |-----------|----------|----------------|
-| `Scorer` | `src/neuralnav/recommendation/scorer.py` | Calculate 4 scores (0-100 scale) |
-| `UseCaseQualityScorer` | `src/neuralnav/recommendation/quality/usecase_scorer.py` | Accuracy scoring based on use case fit |
-| `ConfigFinder.plan_all_capacities()` | `src/neuralnav/recommendation/config_finder.py` | Query benchmarks, score all configs |
-| `Analyzer` | `src/neuralnav/recommendation/analyzer.py` | Filter and sort into 5 ranked lists |
-| `RecommendationWorkflow` | `src/neuralnav/orchestration/workflow.py` | Orchestrate end-to-end flow |
+| `Scorer` | `src/planner/recommendation/scorer.py` | Calculate 4 scores (0-100 scale) |
+| `UseCaseQualityScorer` | `src/planner/recommendation/quality/usecase_scorer.py` | Accuracy scoring based on use case fit |
+| `ConfigFinder.plan_all_capacities()` | `src/planner/recommendation/config_finder.py` | Query benchmarks, score all configs |
+| `Analyzer` | `src/planner/recommendation/analyzer.py` | Filter and sort into 5 ranked lists |
+| `RecommendationWorkflow` | `src/planner/orchestration/workflow.py` | Orchestrate end-to-end flow |
 
 #### Data Flow
 

diff --git a/docs/Recommendation Flow.md b/docs/Recommendation Flow.md
@@ -33,15 +33,15 @@ Return best recommendation or all ranked lists
 | `POST /api/v1/re-recommend` | Re-run with edited specs | Single best config |
 | `POST /api/v1/regenerate-and-recommend` | Regenerate profile from intent | Single best config |
 
-**Entry Point**: [src/neuralnav/api/routes/](../src/neuralnav/api/routes/)
+**Entry Point**: [src/planner/api/routes/](../src/planner/api/routes/)
 
 ---
 
 ## Step-by-Step Flow
 
 ### Step 1: Intent Extraction
 
-**File**: [src/neuralnav/intent_extraction/extractor.py](../src/neuralnav/intent_extraction/extractor.py)
+**File**: [src/planner/intent_extraction/extractor.py](../src/planner/intent_extraction/extractor.py)
 
 The `IntentExtractor` uses an LLM (Ollama qwen2.5:7b) to parse the user's natural language request into structured deployment intent.
 
@@ -65,7 +65,7 @@ intent = intent_extractor.infer_missing_fields(intent)
 
 ### Step 2: Traffic Profile Generation
 
-**File**: [src/neuralnav/specification/traffic_profile.py](../src/neuralnav/specification/traffic_profile.py)
+**File**: [src/planner/specification/traffic_profile.py](../src/planner/specification/traffic_profile.py)
 
 The `TrafficProfileGenerator` maps the use case to a GuideLLM traffic profile and calculates SLO targets.
 
@@ -100,7 +100,7 @@ slo_targets = traffic_generator.generate_slo_targets(intent)
 
 ### Step 3: Benchmark Query (PostgreSQL)
 
-**File**: [src/neuralnav/knowledge_base/benchmarks.py](../src/neuralnav/knowledge_base/benchmarks.py)
+**File**: [src/planner/knowledge_base/benchmarks.py](../src/planner/knowledge_base/benchmarks.py)
 
 The `BenchmarkRepository` queries PostgreSQL for all (model, GPU, tensor_parallel) configurations that meet SLO targets for the traffic profile.
 
@@ -122,7 +122,7 @@ The `BenchmarkRepository` queries PostgreSQL for all (model, GPU, tensor_paralle
 
 ### Step 4: Capacity Planning and Scoring
 
-**File**: [src/neuralnav/recommendation/config_finder.py](../src/neuralnav/recommendation/config_finder.py)
+**File**: [src/planner/recommendation/config_finder.py](../src/planner/recommendation/config_finder.py)
 
 The `ConfigFinder.plan_all_capacities()` method processes each benchmark configuration and calculates four scores.
 
@@ -157,8 +157,8 @@ all_configs = capacity_planner.plan_all_capacities(
 ### Step 5: Multi-Criteria Scoring
 
 **Files**:
-- [src/neuralnav/recommendation/scorer.py](../src/neuralnav/recommendation/scorer.py) - Calculates 4 scores
-- [src/neuralnav/recommendation/quality/usecase_scorer.py](../src/neuralnav/recommendation/quality/usecase_scorer.py) - Benchmark-based quality scoring
+- [src/planner/recommendation/scorer.py](../src/planner/recommendation/scorer.py) - Calculates 4 scores
+- [src/planner/recommendation/quality/usecase_scorer.py](../src/planner/recommendation/quality/usecase_scorer.py) - Benchmark-based quality scoring
 
 #### 5.1 Accuracy Score (0-100)
 
@@ -245,7 +245,7 @@ Custom weights can be provided via API (0-10 scale, normalized to percentages).
 
 ### Step 6: Ranking and Filtering
 
-**File**: [src/neuralnav/recommendation/analyzer.py](../src/neuralnav/recommendation/analyzer.py)
+**File**: [src/planner/recommendation/analyzer.py](../src/planner/recommendation/analyzer.py)
 
 The `Analyzer` generates 5 ranked lists from scored configurations.
 
@@ -281,7 +281,7 @@ ranked_lists = ranking_service.generate_ranked_lists(
 
 ### Step 7: Response Generation
 
-**File**: [src/neuralnav/orchestration/workflow.py](../src/neuralnav/orchestration/workflow.py)
+**File**: [src/planner/orchestration/workflow.py](../src/planner/orchestration/workflow.py)
 
 The `RecommendationWorkflow` orchestrates all steps and returns the appropriate response.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "neuralnav"
+name = "planner"
 version = "0.1.0"
 description = "Confidently navigate LLM deployments from concept to production"
 readme = "README.md"
@@ -47,7 +47,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/neuralnav"]
+packages = ["src/planner"]
 
 [tool.ruff]
 # Set the maximum line length
@@ -115,7 +115,7 @@ line-ending = "auto"
 
 [tool.ruff.lint.isort]
 # Organize imports into sections
-known-first-party = ["neuralnav"]
+known-first-party = ["planner"]
 section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
 
 [tool.mypy]

diff --git a/scripts/load_benchmarks.py b/scripts/load_benchmarks.py
@@ -5,7 +5,7 @@
 This script reads benchmark data from a JSON file
 and inserts it into the PostgreSQL exported_summaries table.
 
-Core loading logic lives in neuralnav.knowledge_base.loader and is
+Core loading logic lives in planner.knowledge_base.loader and is
 shared with the /api/v1/db/* API endpoints.
 """
 
@@ -16,7 +16,7 @@
 
 import psycopg2
 
-from neuralnav.knowledge_base.loader import insert_benchmarks
+from planner.knowledge_base.loader import insert_benchmarks
 
 
 def get_db_connection():

diff --git a/src/neuralnav/__init__.py → src/planner/__init__.py b/src/neuralnav/__init__.py → src/planner/__init__.py
diff --git a/src/neuralnav/api/__init__.py → src/planner/api/__init__.py b/src/neuralnav/api/__init__.py → src/planner/api/__init__.py
diff --git a/src/neuralnav/api/app.py → src/planner/api/app.py b/src/neuralnav/api/app.py → src/planner/api/app.py
@@ -8,7 +8,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
-from neuralnav.api.routes import (
+from planner.api.routes import (
     configuration_router,
     database_router,
     health_router,
@@ -32,7 +32,7 @@
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Initialize all singletons on app.state during startup."""
-    from neuralnav.api.dependencies import init_app_state
+    from planner.api.dependencies import init_app_state
 
     logger.info("Initializing app state...")
     try: