Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions python/cactus/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .download import cmd_download
from .compile import cmd_build
from .run import cmd_run
from .serve import cmd_serve
from .transcribe import cmd_transcribe
from .test import cmd_test
from .convert import cmd_convert
Expand Down Expand Up @@ -68,6 +69,15 @@ def create_parser():

-----------------------------------------------------------------

cactus serve [model] OpenAI-compatible local HTTP server
serves prepared v2 bundles only

Optional flags:
--host <addr> bind address (default: 127.0.0.1)
--port <port> port (default: 8080)

-----------------------------------------------------------------

cactus download <model> fetch pre-converted CQ from Cactus-Compute

Optional flags:
Expand Down Expand Up @@ -199,6 +209,15 @@ def create_parser():
transcribe_parser.add_argument("--reconvert", action="store_true",
help="Download original model and convert (instead of using pre-converted from Cactus-Compute)")

# ── serve ─────────────────────────────────────────────────────────
serve_parser = subparsers.add_parser("serve", help="Start OpenAI-compatible HTTP server")
serve_parser.add_argument("model", nargs="?", default=None,
help="Prepared v2 bundle path, local model dir name, or HF model ID")
serve_parser.add_argument("--host", default="127.0.0.1",
help="Bind address (default: 127.0.0.1)")
serve_parser.add_argument("--port", type=int, default=8080,
help="Port (default: 8080)")

# ── test ──────────────────────────────────────────────────────────
test_parser = subparsers.add_parser("test", help="Run the test suite")
test_parser.add_argument("--model", dest="model_id", default=DEFAULT_TEST_MODEL_ID,
Expand Down Expand Up @@ -270,6 +289,7 @@ def create_parser():
"download": cmd_download,
"build": cmd_build,
"run": cmd_run,
"serve": cmd_serve,
"transcribe": cmd_transcribe,
"test": cmd_test,

Expand Down
71 changes: 71 additions & 0 deletions python/cactus/cli/serve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from pathlib import Path

from .common import BLUE, GREEN, RED, PROJECT_ROOT, is_repo_checkout, print_color
from .download import get_weights_dir


def _weights_root() -> Path:
if is_repo_checkout():
return PROJECT_ROOT / "weights"
return Path.home() / ".cache" / "cactus" / "weights"


def _resolve_model_arg(model: str | None) -> tuple[Path | None, str | None]:
if not model:
return None, None
path = Path(model).expanduser()
if path.is_dir():
return path, path.name
candidate = _weights_root() / model
if candidate.is_dir():
return candidate, candidate.name
hf_candidate = get_weights_dir(model)
if hf_candidate.is_dir():
return hf_candidate, hf_candidate.name
return None, model


def _is_valid_bundle(path: Path) -> bool:
return (path / "config.txt").exists() and (path / "components" / "manifest.json").exists()


def cmd_serve(args):
"""Start the OpenAI-compatible HTTP server."""
model_path, model_name = _resolve_model_arg(args.model)
if args.model and model_path is None:
print_color(RED, f"Error: model not found: {args.model}")
print("Prepare a v2 bundle first with `cactus run <model>` or `cactus convert <model>`.")
return 1
if model_path is not None and not _is_valid_bundle(model_path):
print_color(RED, f"Error: not a valid v2 Cactus bundle: {model_path}")
print("Expected config.txt and components/manifest.json.")
return 1

try:
import uvicorn
except ImportError:
print_color(RED, "Error: uvicorn not installed. Install the serve extra or run `pip install fastapi uvicorn python-multipart`.")
return 1

try:
from ..server import create_app
except ImportError:
print_color(RED, "Error: server dependencies not installed. Install the serve extra or run `pip install fastapi uvicorn python-multipart`.")
return 1

try:
application = create_app(
weights_root=_weights_root(),
model_path=model_path,
default_model=model_name,
)
except RuntimeError as exc:
print_color(RED, f"Error: {exc}")
print("Prepare a v2 bundle first with `cactus run <model>` or `cactus convert <model>`.")
return 1

models = sorted(application.state.registry.models)
print_color(GREEN, f"Available models: {', '.join(models)}")
print_color(BLUE, f"Starting server on {args.host}:{args.port}")
uvicorn.run(application, host=args.host, port=args.port, log_level="info")
return 0
Loading
Loading