cactus-compute · HenryNdubuaku · May 29, 2026 · May 27, 2026
diff --git a/python/cactus/cli/__init__.py b/python/cactus/cli/__init__.py
@@ -10,6 +10,7 @@
 from .download import cmd_download
 from .compile import cmd_build
 from .run import cmd_run
+from .serve import cmd_serve
 from .transcribe import cmd_transcribe
 from .test import cmd_test
 from .convert import cmd_convert
@@ -68,6 +69,15 @@ def create_parser():
 
   -----------------------------------------------------------------
 
+  cactus serve [model]                 OpenAI-compatible local HTTP server
+                                       serves prepared v2 bundles only
+
+    Optional flags:
+    --host <addr>                      bind address (default: 127.0.0.1)
+    --port <port>                      port (default: 8080)
+
+  -----------------------------------------------------------------
+
   cactus download <model>              fetch pre-converted CQ from Cactus-Compute
 
     Optional flags:
@@ -199,6 +209,15 @@ def create_parser():
     transcribe_parser.add_argument("--reconvert", action="store_true",
                                    help="Download original model and convert (instead of using pre-converted from Cactus-Compute)")
 
+    # ── serve ─────────────────────────────────────────────────────────
+    serve_parser = subparsers.add_parser("serve", help="Start OpenAI-compatible HTTP server")
+    serve_parser.add_argument("model", nargs="?", default=None,
+                              help="Prepared v2 bundle path, local model dir name, or HF model ID")
+    serve_parser.add_argument("--host", default="127.0.0.1",
+                              help="Bind address (default: 127.0.0.1)")
+    serve_parser.add_argument("--port", type=int, default=8080,
+                              help="Port (default: 8080)")
+
     # ── test ──────────────────────────────────────────────────────────
     test_parser = subparsers.add_parser("test", help="Run the test suite")
     test_parser.add_argument("--model", dest="model_id", default=DEFAULT_TEST_MODEL_ID,
@@ -270,6 +289,7 @@ def create_parser():
     "download":   cmd_download,
     "build":      cmd_build,
     "run":        cmd_run,
+    "serve":      cmd_serve,
     "transcribe": cmd_transcribe,
     "test":       cmd_test,
 

diff --git a/python/cactus/cli/serve.py b/python/cactus/cli/serve.py
@@ -0,0 +1,71 @@
+from pathlib import Path
+
+from .common import BLUE, GREEN, RED, PROJECT_ROOT, is_repo_checkout, print_color
+from .download import get_weights_dir
+
+
+def _weights_root() -> Path:
+    if is_repo_checkout():
+        return PROJECT_ROOT / "weights"
+    return Path.home() / ".cache" / "cactus" / "weights"
+
+
+def _resolve_model_arg(model: str | None) -> tuple[Path | None, str | None]:
+    if not model:
+        return None, None
+    path = Path(model).expanduser()
+    if path.is_dir():
+        return path, path.name
+    candidate = _weights_root() / model
+    if candidate.is_dir():
+        return candidate, candidate.name
+    hf_candidate = get_weights_dir(model)
+    if hf_candidate.is_dir():
+        return hf_candidate, hf_candidate.name
+    return None, model
+
+
+def _is_valid_bundle(path: Path) -> bool:
+    return (path / "config.txt").exists() and (path / "components" / "manifest.json").exists()
+
+
+def cmd_serve(args):
+    """Start the OpenAI-compatible HTTP server."""
+    model_path, model_name = _resolve_model_arg(args.model)
+    if args.model and model_path is None:
+        print_color(RED, f"Error: model not found: {args.model}")
+        print("Prepare a v2 bundle first with `cactus run <model>` or `cactus convert <model>`.")
+        return 1
+    if model_path is not None and not _is_valid_bundle(model_path):
+        print_color(RED, f"Error: not a valid v2 Cactus bundle: {model_path}")
+        print("Expected config.txt and components/manifest.json.")
+        return 1
+
+    try:
+        import uvicorn
+    except ImportError:
+        print_color(RED, "Error: uvicorn not installed. Install the serve extra or run `pip install fastapi uvicorn python-multipart`.")
+        return 1
+
+    try:
+        from ..server import create_app
+    except ImportError:
+        print_color(RED, "Error: server dependencies not installed. Install the serve extra or run `pip install fastapi uvicorn python-multipart`.")
+        return 1
+
+    try:
+        application = create_app(
+            weights_root=_weights_root(),
+            model_path=model_path,
+            default_model=model_name,
+        )
+    except RuntimeError as exc:
+        print_color(RED, f"Error: {exc}")
+        print("Prepare a v2 bundle first with `cactus run <model>` or `cactus convert <model>`.")
+        return 1
+
+    models = sorted(application.state.registry.models)
+    print_color(GREEN, f"Available models: {', '.join(models)}")
+    print_color(BLUE, f"Starting server on {args.host}:{args.port}")
+    uvicorn.run(application, host=args.host, port=args.port, log_level="info")
+    return 0