diff --git a/.github/skillspector-allow.yml b/.github/skillspector-allow.yml index 96123ab..6131795 100644 --- a/.github/skillspector-allow.yml +++ b/.github/skillspector-allow.yml @@ -101,6 +101,18 @@ suppressions: argparse defaults / explicit --image-model/--tts-model/--stt-model flags, not from LLM or model output. Nothing here consumes unvalidated model output, so there is no injection sink to sanitize. + - skill: local-ai-use + rule: TM2 + file: SKILL.md + match: Chaining Abuse + reason: >- + False positive. Line 103 is the documented Ubuntu/Debian install + one-liner `sudo add-apt-repository -y ppa:lemonade-team/stable && + sudo apt-get update && sudo apt-get install -y lemonade-server + lemonade-desktop`. The `&&` chaining is the standard apt install + sequence (add PPA, refresh index, install package), not tool/command + chaining of untrusted or model-derived steps. No LLM output feeds the + chain and each command is a fixed, reviewable install step. - skill: local-ai-use rule: P2 file: templates/local-ai-rule.md diff --git a/eval/behavioral/tests/test_local_ai_use.py b/eval/behavioral/tests/test_local_ai_use.py index 308af11..21ec051 100644 --- a/eval/behavioral/tests/test_local_ai_use.py +++ b/eval/behavioral/tests/test_local_ai_use.py @@ -29,13 +29,10 @@ def test_generate_image_of_a_cat(): run.workspace_contains("out.png") # Positive behavioral expectations + run.should("Install Lemonade Server if it is not already installed") run.should("Download the SD-Turbo model if the model is not already downloaded") run.should("Add a 'Local AI Use' block to AGENTS.md") # Negative behavioral expectations - run.should_not("Use the GenerateImage tool") - run.should_not("Use a cloud image API") + run.should_not("Pull unrelated modalities for this image generation task") run.should_not("Reach for a cloud image path instead of local Lemonade") - - # Skipped behavioral expectations - #run.should_not("Pull unrelated modalities for an image-only task") diff --git a/skills/local-ai-use/SKILL.md b/skills/local-ai-use/SKILL.md index f5c5d54..d2896cb 100644 --- a/skills/local-ai-use/SKILL.md +++ b/skills/local-ai-use/SKILL.md @@ -21,18 +21,28 @@ needs image generation, text-to-speech, or speech-to-text uses the local agent's own LLM keeps handling text; only the expensive multimodal calls move on-device. -The skill does two things: - -1. **Verifies that local Lemonade is reachable and has the right models.** -2. **Drops a `Local AI Use` block into the workspace `AGENTS.md`** so the agent +The skill does three things: + +1. **Makes sure local Lemonade is installed and running.** If the `lemonade` + CLI is missing, the setup script installs the **full version** of Lemonade + (server + desktop app) on the user's behalf; if the server is installed but + not running, it launches it. +2. **Verifies that local Lemonade is reachable.** +3. **Drops a `Local AI Use` block into the workspace `AGENTS.md`** so the agent reads the routing rule on every later turn, in Cursor, Claude Code, Codex, Gemini CLI, and any other agent that respects `AGENTS.md`. +Models are **not** downloaded during setup. Each default model is pulled +lazily, on first use, by the routing rule (e.g. the first image request pulls +the image model). This keeps setup fast and avoids gigabytes of downloads the +user may never need. + ## When to use this skill Use this skill when **all** of the following are true: -- The user has, or is willing to install, the system-wide Lemonade Server. +- The user wants local Lemonade. If it is not yet installed, the setup script + installs the **full version** (server + desktop app) for them automatically. - The user accepts the default Lemonade endpoint `http://localhost:13305`. - The user wants the change to be **persistent** across future turns and agent restarts (the rule is written to disk). @@ -44,14 +54,15 @@ instead. ## Prerequisites - **OS:** Windows 11 x64, Ubuntu/Debian x64, or macOS (beta). -- **Lemonade Server CLI on `PATH`:** verify with `lemonade --version`. If - missing, install from - before continuing. Do not silently install on the user's machine; that is a - system-wide change and must be the user's call. +- **Lemonade Server:** the setup script installs it if missing. It downloads + and silently installs the **full version** (Windows `lemonade.msi`, the + Ubuntu/Debian `ppa:lemonade-team/stable` PPA plus `lemonade-desktop`, or the + macOS `.pkg`), then launches the server. On Linux/macOS this needs `sudo`. + Pass `--no-install` if the user wants to install it themselves instead. - **Disk:** ~8 GB free for the three default models (SD-Turbo + Whisper-Tiny - + kokoro-v1). -- **Network:** required for the first `lemonade pull` of each model. After - that, every modality runs offline. + + kokoro-v1), plus ~0.1 GB for the installer itself. +- **Network:** required for the install download and the first `lemonade pull` + of each model. After that, every modality runs offline. ## The opinionated path @@ -59,52 +70,67 @@ Run this checklist top to bottom. Track progress against it; do not move on until each step verifies. ``` -[ ] 1. Confirm Lemonade Server is installed and reachable -[ ] 2. Pull the three default modality models -[ ] 3. Install the routing rule into the workspace AGENTS.md -[ ] 4. Smoke-test image, TTS, and STT against the local endpoint +[ ] 1. Ensure Lemonade Server is installed and running (auto-install if missing) +[ ] 2. Install the routing rule into the workspace AGENTS.md ``` -The single command that does steps 1, 2, and 3 in one shot is: +The single command that does both steps in one shot is: ```bash python scripts/setup_local_ai.py ``` -The script is idempotent: re-running it on a -fully configured workspace is a no-op apart from a healthcheck. Read the -sections below for what to do when each step fails. +It auto-installs the full version of Lemonade if the `lemonade` CLI is +missing, launches the server if it is not running, then writes the rule. The +script is idempotent: re-running it on a fully configured workspace is a no-op +apart from a healthcheck. Read the sections below for what to do when each +step fails. --- -## Step 1: confirm Lemonade Server is reachable +## Step 1: ensure Lemonade Server is installed and running -Run: +`scripts/setup_local_ai.py` handles this end to end, but here is what it does +so you can do it by hand or debug it: -```bash -lemonade status --json -``` +**1a. Is the CLI installed?** Check whether `lemonade` is on `PATH` +(`lemonade --version`). If it is not, install the **full version** on the +user's behalf: -Two acceptable outcomes: +| OS | Install the full version | +|---|---| +| Windows | Download `lemonade.msi` from the [latest release](https://github.com/lemonade-sdk/lemonade/releases/latest/download/lemonade.msi) and run `msiexec /i lemonade.msi /qn` (silent, per-user, no elevation). | +| Ubuntu/Debian | `sudo add-apt-repository -y ppa:lemonade-team/stable && sudo apt-get update && sudo apt-get install -y lemonade-server lemonade-desktop` | +| macOS (beta) | Download the `Lemonade--Darwin.pkg` from the latest release and run `sudo installer -pkg Lemonade--Darwin.pkg -target /`. | + +The full installer bundles the server **and** the desktop app; the +server-only minimal MSI and the legacy `lemonade-server` CLI are deprecated +upstream. After a Windows install the CLI lands in +`%LOCALAPPDATA%\lemonade_server` and is added to the *user* PATH (new shells +only); the setup script probes that directory so it works in the same run. + +**1b. Is the server running?** Check `lemonade status --json`. | `lemonade status` says | Action | |---|---| | `Server is running on port 13305` | Continue to Step 2. | -| `Server is not running` | Start it. On Windows, launch the **Lemonade** Start Menu shortcut. On Linux, run `sudo systemctl start lemonade-server`. Re-check `lemonade status`. | +| `Server is not running` | Launch it with `lemonade serve` (the script does this in the background and polls `/api/v1/health` until it answers). | -If `lemonade` is not on `PATH` at all, the server is not installed. Stop and -point the user at . Do not -attempt a silent install. +Only if the automatic install genuinely fails (no `apt-get`, no `sudo`, +download blocked) should you stop and point the user at +. The rest of this skill assumes the endpoint is `http://localhost:13305/api/v1` and no API key is required (the system-wide server defaults to no auth on loopback). If the user has set `LEMONADE_API_KEY`, the routing rule template in `templates/local-ai-rule.md` shows where to add the `Authorization` header. -## Step 2: pull the three default modality models +### Default modality models (pulled on first use, not during setup) -Pull these three. They are the **Lite Collection** defaults from Lemonade -OmniRouter, sized to keep token-and-cost savings real on commodity hardware: +Setup does **not** download these. The installed rule pulls each one the first +time that modality is requested. They are the **Lite Collection** defaults from +Lemonade OmniRouter, sized to keep token-and-cost savings real on commodity +hardware: | Modality | Model | Size | Why this default | |---|---|---|---| @@ -112,34 +138,20 @@ OmniRouter, sized to keep token-and-cost savings real on commodity hardware: | Text-to-speech | `kokoro-v1` | ~0.3 GB | Only TTS model Lemonade currently supports; CPU-only, low latency | | Speech-to-text | `Whisper-Tiny` | ~0.1 GB | Smallest Whisper; fast on CPU. Upgrade to `Whisper-Large-v3-Turbo` if accuracy matters more than latency. | -```bash -lemonade pull SD-Turbo -lemonade pull kokoro-v1 -lemonade pull Whisper-Tiny -``` - -To choose a different model while installing the rule, pass it to the setup -script. For example, to make future image requests use SDXL: +To write a different model ID into the rule, pass it to the setup script. For +example, to make future image requests use SDXL: ```bash python scripts/setup_local_ai.py --image-model SDXL-Turbo ``` -The script will pull the selected model and write that model ID into the -installed `AGENTS.md` rule. The same pattern works for `--tts-model` and -`--stt-model`. - -Each `pull` is idempotent. To verify what is already downloaded: - -```bash -lemonade list --downloaded -``` - -For coverage of larger / higher-quality alternatives (`SDXL-Turbo`, -`Flux-2-Klein-4B`, `Whisper-Large-v3-Turbo`), see the +That model ID is written into the installed `AGENTS.md` rule and pulled on its +first use. The same pattern works for `--tts-model` and `--stt-model`. For +larger / higher-quality alternatives (`SDXL-Turbo`, `Flux-2-Klein-4B`, +`Whisper-Large-v3-Turbo`), see the [model picker in reference.md](reference.md#model-picker). -## Step 3: install the routing rule into AGENTS.md +## Step 2: install the routing rule into AGENTS.md The rule is a Markdown block stored in [`templates/local-ai-rule.md`](templates/local-ai-rule.md). Append it to the workspace's `AGENTS.md` (create the file if missing). Both @@ -169,44 +181,6 @@ block to: The rule's content is identical; only the file location changes. -## Step 4: smoke-test the three modalities - -Verify each modality against the live server before declaring success. These -mirror the inline patterns in the installed rule, so a green pass here means -the rule will work. If you installed with a model override such as -`--image-model SDXL-Turbo`, use that model ID in the smoke test and confirm -the installed `AGENTS.md` rule contains it. - -**Image generation** (writes `out.png`): - -```bash -curl -sX POST http://localhost:13305/api/v1/images/generations \ - -H "Content-Type: application/json" \ - -d '{"model":"SD-Turbo","prompt":"a single red apple on a white table","size":"512x512","steps":4,"response_format":"b64_json"}' \ - | python -c "import sys,json,base64; open('out.png','wb').write(base64.b64decode(json.load(sys.stdin)['data'][0]['b64_json']))" -``` - -**Text-to-speech** (writes `out.mp3`): - -```bash -curl -sX POST http://localhost:13305/api/v1/audio/speech \ - -H "Content-Type: application/json" \ - -d '{"model":"kokoro-v1","input":"Local AI is now active.","response_format":"mp3"}' \ - -o out.mp3 -``` - -**Speech-to-text** (round-trips `out.mp3` → text via a wav re-encode): - -```bash -ffmpeg -y -i out.mp3 -ar 16000 -ac 1 out.wav -curl -sX POST http://localhost:13305/api/v1/audio/transcriptions \ - -F "file=@out.wav" -F "model=Whisper-Tiny" -``` - -If any of the three returns a non-2xx status, fix it now. The rule we just -installed sends future requests to these same endpoints, so a broken endpoint -becomes a broken user experience. - --- ## What changes after this skill runs @@ -236,8 +210,8 @@ machine. | Symptom | Cause | Recovery | |---|---|---| -| `lemonade: command not found` | Server CLI not installed | Install from ; restart shell. | -| `Server is not running` | Service stopped after install | Windows: launch the **Lemonade** Start Menu shortcut. Linux: `sudo systemctl start lemonade-server`. | +| `lemonade: command not found` | CLI not installed | Re-run `python scripts/setup_local_ai.py` (auto-installs the full version). If it just installed on Windows, open a new shell so the user PATH refreshes, or the script will find it under `%LOCALAPPDATA%\lemonade_server`. | +| `Server is not running` | Service stopped after install | Run `lemonade serve` (the setup script launches it for you). | | `POST /v1/images/generations` returns 404 model not found | Image model not downloaded | `lemonade pull SD-Turbo` and retry. | | Image generation is slow on CPU (~4–5 min) | sd-cpp on CPU backend | Install the GPU backend on supported AMD hardware: `lemonade backends install sd-cpp:rocm`. | | `POST /v1/audio/transcriptions` returns 400 unsupported format | Input is not 16 kHz mono WAV | Re-encode with `ffmpeg -i in.* -ar 16000 -ac 1 out.wav`. | @@ -249,14 +223,11 @@ machine. Mark this skill complete only when **all** of the following are true: - [ ] `lemonade status --json` reports the server running on port 13305. -- [ ] `lemonade list --downloaded` shows `SD-Turbo`, `kokoro-v1`, and - `Whisper-Tiny`. - [ ] The workspace `AGENTS.md` contains the `amd-skills:local-ai-use` block. -- [ ] All three smoke tests in Step 4 succeed. - [ ] On a follow-up turn, asking the agent to "generate an image of X" causes it to POST to `http://localhost:13305/api/v1/images/generations` - rather than calling a cloud tool. + (pulling the model on first use) rather than calling a cloud tool. If any box is unchecked, the user is still paying cloud cost for at least one modality. diff --git a/skills/local-ai-use/scripts/setup_local_ai.py b/skills/local-ai-use/scripts/setup_local_ai.py index 2ea62c9..a845fd0 100644 --- a/skills/local-ai-use/scripts/setup_local_ai.py +++ b/skills/local-ai-use/scripts/setup_local_ai.py @@ -5,19 +5,25 @@ # /// """One-shot setup for the `local-ai-use` skill. -Performs the three setup steps from SKILL.md: - - 1. Confirms the system-wide Lemonade Server is installed and reachable on - http://localhost:13305 (override with --host / --port or LEMONADE_HOST / - LEMONADE_PORT). - 2. Pulls the three default modality models if they are missing - (image: SD-Turbo, TTS: kokoro-v1, STT: Whisper-Tiny). - 3. Writes the routing rule from `templates/local-ai-rule.md` into +Performs the setup steps from SKILL.md: + + 1. Ensures the full Lemonade Server (server + desktop app) is installed and + running on http://localhost:13305 (override with --host / --port or + LEMONADE_HOST / LEMONADE_PORT). If the `lemonade` CLI is missing, the + full version is installed on the user's behalf; if the server is not + running, it is launched. + 2. Writes the routing rule from `templates/local-ai-rule.md` into /AGENTS.md, between stable BEGIN/END markers so re-runs replace the block in place rather than appending. +Setup never downloads models: the default image/TTS/STT models are pulled +on first use, by the installed AGENTS.md rule (see its failure +handling). This keeps setup fast and offline-friendly. + The script is idempotent: a second run on a fully configured workspace only re-runs the healthcheck. It exits non-zero on any unrecoverable failure. +Pass --no-install to refuse the automatic install (it then just reports the +missing CLI and exits non-zero, the old behaviour). Constants are documented inline; nothing is magical. """ @@ -27,9 +33,12 @@ import argparse import json import os +import platform import re import shutil import subprocess +import tempfile +import time import urllib.error import urllib.request from pathlib import Path @@ -59,6 +68,39 @@ INSTALL_URL = "https://lemonade-server.ai/install_options.html" +# The *full* Windows installer: Lemonade Server plus the desktop app (the +# minimal, server-only MSI and the legacy `lemonade-server` CLI are deprecated +# upstream). `releases/latest/download/` always resolves to the newest +# published asset of that exact name, so we never have to pin a version. +WINDOWS_MSI_URL = ( + "https://github.com/lemonade-sdk/lemonade/releases/latest/download/lemonade.msi" +) +# Default per-user install location used by lemonade.msi. The CLI is added to +# the *user* PATH in the registry, which the current process will not see, so +# we also probe this tree directly after installing. +WINDOWS_INSTALL_DIR = Path( + os.environ.get("LOCALAPPDATA", str(Path.home() / "AppData" / "Local")) +) / "lemonade_server" + +# GitHub release metadata, used to resolve the versioned macOS .pkg asset +# (its filename embeds the version, so there is no stable latest/download URL). +GITHUB_LATEST_RELEASE_API = ( + "https://api.github.com/repos/lemonade-sdk/lemonade/releases/latest" +) + +# Ubuntu/Debian "full" install: the stable PPA (server) plus the desktop +# frontend package. Run as a single shell pipeline so one sudo prompt covers +# the whole thing. +LINUX_APT_INSTALL = ( + "sudo add-apt-repository -y ppa:lemonade-team/stable && " + "sudo apt-get update && " + "sudo apt-get install -y lemonade-server lemonade-desktop" +) + +# CLI names to look for / drive, newest first. `lemonade-server` is the +# deprecated alias kept for older installs. +CLI_NAMES = ("lemonade", "lemonade-server") + def _default_workspace() -> Path: """Workspace root for AGENTS.md. @@ -86,77 +128,160 @@ def _http_get(url: str, timeout_s: float) -> tuple[int, bytes]: return r.status, r.read() -def check_cli_installed() -> bool: - """Return True if the `lemonade` CLI is on PATH.""" - return shutil.which("lemonade") is not None +def find_cli() -> str | None: + """Return a runnable Lemonade CLI, or None. + Checks PATH for `lemonade` (then the deprecated `lemonade-server` alias). + On Windows the MSI updates the *user* PATH in the registry, which the + current process will not have inherited, so we also probe the default + per-user install tree for the executables. + """ + for name in CLI_NAMES: + found = shutil.which(name) + if found: + return found + if platform.system() == "Windows" and WINDOWS_INSTALL_DIR.exists(): + for name in CLI_NAMES: + for exe in WINDOWS_INSTALL_DIR.rglob(f"{name}.exe"): + return str(exe) + return None + + +def install_lemonade() -> None: + """Install the full version of Lemonade for the current OS. + + Raises RuntimeError on any unrecoverable failure so the caller can report + a clean message and fall back to the manual install link. + """ + system = platform.system() + if system == "Windows": + _install_windows() + elif system == "Linux": + _install_linux() + elif system == "Darwin": + _install_macos() + else: + raise RuntimeError( + f"No automatic installer for this OS ({system}). " + f"Install manually: {INSTALL_URL}" + ) -def check_server_reachable(host: str, port: int) -> bool: - """Return True if /api/v1/health responds 200 within 3 seconds.""" - url = f"http://{host}:{port}/api/v1/health" + +def _download(url: str, dest: Path) -> None: + _print(f"downloading {url}") try: - status, _ = _http_get(url, timeout_s=3.0) - return status == 200 - except (urllib.error.URLError, OSError): - return False + urllib.request.urlretrieve(url, dest) # noqa: S310 + except (urllib.error.URLError, OSError) as exc: + raise RuntimeError(f"download failed ({url}): {exc}") from exc + + +def _run(cmd: list[str] | str, *, shell: bool = False) -> None: + """Run an install command, surfacing a clean error on failure.""" + printable = cmd if isinstance(cmd, str) else " ".join(cmd) + _print(f"running: {printable}") + result = subprocess.run(cmd, shell=shell) # noqa: S602,S603 + if result.returncode != 0: + raise RuntimeError(f"command failed (exit {result.returncode}): {printable}") + + +def _install_windows() -> None: + """Silently install the full lemonade.msi (server + desktop app).""" + msi = Path(tempfile.gettempdir()) / "lemonade.msi" + _download(WINDOWS_MSI_URL, msi) + # /qn = silent, per-user (no elevation needed). The MSI registers the CLI + # and Start Menu shortcut and pulls the full app payload. + _run(["msiexec", "/i", str(msi), "/qn"]) + _print("Lemonade full version installed.") + + +def _install_linux() -> None: + """Install the stable PPA server plus the desktop frontend on apt distros.""" + if shutil.which("apt-get") is None: + raise RuntimeError( + "Automatic install only supports apt-based distros (Ubuntu/Debian). " + f"Install manually: {INSTALL_URL}" + ) + if os.geteuid() != 0 and shutil.which("sudo") is None: # type: ignore[attr-defined] + raise RuntimeError( + "Need root (or sudo) to install system packages. " + f"Install manually: {INSTALL_URL}" + ) + _run(LINUX_APT_INSTALL, shell=True) + _print("Lemonade full version installed.") -def list_downloaded_models(host: str, port: int) -> set[str]: - """Return the set of locally downloaded model IDs. +def _install_macos() -> None: + """Download the latest signed .pkg and install it system-wide.""" + pkg_url = _resolve_macos_pkg_url() + pkg = Path(tempfile.gettempdir()) / "Lemonade.pkg" + _download(pkg_url, pkg) + _run(["sudo", "installer", "-pkg", str(pkg), "-target", "/"]) + _print("Lemonade full version installed.") - Uses `lemonade list --downloaded` (CLI) and falls back to - GET /api/v1/models when the CLI lacks the flag. Returning an empty set is - treated as "could not determine" by the caller, which still attempts the - pulls; `lemonade pull` is itself idempotent. - """ - try: - out = subprocess.run( - ["lemonade", "list", "--downloaded", "--json"], - check=True, capture_output=True, text=True, timeout=10, - ).stdout - data = json.loads(out) - return {m.get("id", "") for m in data if isinstance(m, dict)} - except (subprocess.SubprocessError, json.JSONDecodeError, FileNotFoundError): - pass +def _resolve_macos_pkg_url() -> str: + """Resolve the versioned macOS .pkg download URL from the latest release.""" + req = urllib.request.Request( + GITHUB_LATEST_RELEASE_API, headers={"Accept": "application/vnd.github+json"} + ) try: - status, body = _http_get( - f"http://{host}:{port}/api/v1/models", - timeout_s=5, + with urllib.request.urlopen(req, timeout=15.0) as r: # noqa: S310 + data = json.loads(r.read()) + except (urllib.error.URLError, OSError, ValueError) as exc: + raise RuntimeError(f"could not query latest release: {exc}") from exc + for asset in data.get("assets", []): + name = asset.get("name", "") + if name.endswith("-Darwin.pkg"): + return asset["browser_download_url"] + raise RuntimeError( + "No macOS .pkg asset found in the latest release. " + f"Install manually: {INSTALL_URL}" + ) + + +def launch_server(cli: str, host: str, port: int) -> None: + """Start the Lemonade server in the background (it stays up after we exit).""" + cmd = [cli, "serve"] + # Only pass overrides; the server already defaults to localhost:13305. + if port != DEFAULT_PORT: + cmd += ["--port", str(port)] + if host not in {DEFAULT_HOST, "localhost", "::1"}: + cmd += ["--host", host] + _print(f"launching: {' '.join(cmd)}") + kwargs: dict = { + "stdout": subprocess.DEVNULL, + "stderr": subprocess.DEVNULL, + } + if platform.system() == "Windows": + # Detach so the persistent server survives this process exiting. + kwargs["creationflags"] = ( + subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP ) - if status == 200: - data = json.loads(body) - return { - m.get("id", "") for m in data.get("data", []) - if isinstance(m, dict) and m.get("downloaded") - } - except (urllib.error.URLError, OSError, json.JSONDecodeError): - pass + else: + kwargs["start_new_session"] = True + try: + subprocess.Popen(cmd, **kwargs) # noqa: S603 + except OSError as exc: + raise RuntimeError(f"could not launch `{' '.join(cmd)}`: {exc}") from exc + - return set() +def wait_for_server(host: str, port: int, timeout_s: float = 90.0) -> bool: + """Poll /api/v1/health until it answers 200 or we hit the timeout.""" + deadline = time.monotonic() + timeout_s + while time.monotonic() < deadline: + if check_server_reachable(host, port): + return True + time.sleep(2.0) + return False -def pull_model(model: str) -> bool: - """Run `lemonade pull `. Returns True on success.""" - _print(f"pulling {model}...") +def check_server_reachable(host: str, port: int) -> bool: + """Return True if /api/v1/health responds 200 within 3 seconds.""" + url = f"http://{host}:{port}/api/v1/health" try: - subprocess.run( - ["lemonade", "pull", model], - check=True, - # Stream output so the user sees the download progress instead of - # staring at a frozen prompt; SD-Turbo is several GB. - stdout=None, stderr=None, - # SD-Turbo is the largest pull at ~5 GB. 30 minutes is generous - # for a slow connection; below that we'd false-positive on real - # downloads. - timeout=30 * 60, - ) - return True - except subprocess.CalledProcessError as exc: - _print(f"pull failed for {model} (exit {exc.returncode})") - return False - except subprocess.TimeoutExpired: - _print(f"pull timed out for {model} after 30 minutes") + status, _ = _http_get(url, timeout_s=3.0) + return status == 200 + except (urllib.error.URLError, OSError): return False @@ -276,63 +401,72 @@ def main(argv: list[str] | None = None) -> int: default=int(os.environ.get("LEMONADE_PORT", str(DEFAULT_PORT))), help="Lemonade Server port (default: 13305 / $LEMONADE_PORT).", ) - parser.add_argument( - "--skip-pull", - action="store_true", - help="Do not pull missing models; just verify and write AGENTS.md.", - ) parser.add_argument( "--image-model", default=DEFAULT_IMAGE_MODEL, - help=f"Image generation model to pull and write into AGENTS.md (default: {DEFAULT_IMAGE_MODEL}).", + help=f"Image generation model written into AGENTS.md, pulled on first use (default: {DEFAULT_IMAGE_MODEL}).", ) parser.add_argument( "--tts-model", default=DEFAULT_TTS_MODEL, - help=f"Text-to-speech model to pull and write into AGENTS.md (default: {DEFAULT_TTS_MODEL}).", + help=f"Text-to-speech model written into AGENTS.md, pulled on first use (default: {DEFAULT_TTS_MODEL}).", ) parser.add_argument( "--stt-model", default=DEFAULT_STT_MODEL, - help=f"Speech-to-text model to pull and write into AGENTS.md (default: {DEFAULT_STT_MODEL}).", + help=f"Speech-to-text model written into AGENTS.md, pulled on first use (default: {DEFAULT_STT_MODEL}).", + ) + parser.add_argument( + "--no-install", + action="store_true", + help="Do not auto-install/launch Lemonade; just report and exit non-zero if missing.", ) args = parser.parse_args(argv) - if not check_cli_installed(): - _print("FAIL: `lemonade` is not on PATH.") - _print(f"Install Lemonade Server first: {INSTALL_URL}") - return 2 + cli = find_cli() + if cli is None: + if args.no_install: + _print("FAIL: `lemonade` is not on PATH (--no-install set).") + _print(f"Install the full version manually: {INSTALL_URL}") + return 2 + _print("`lemonade` CLI not found; installing the full version of Lemonade.") + try: + install_lemonade() + except RuntimeError as exc: + _print(f"FAIL: automatic install did not complete: {exc}") + return 2 + cli = find_cli() + if cli is None: + _print("FAIL: install finished but the `lemonade` CLI is still not found.") + _print( + "Open a new shell so PATH refreshes and re-run, or install " + f"manually: {INSTALL_URL}" + ) + return 2 + _print(f"using Lemonade CLI: {cli}") if not check_server_reachable(args.host, args.port): - _print( - f"FAIL: Lemonade Server is not responding at " - f"http://{args.host}:{args.port}/api/v1/health." - ) - _print( - "Start it: on Windows launch the Lemonade Start Menu shortcut; " - "on Linux run `sudo systemctl start lemonade-server`." - ) - return 3 + if args.no_install: + _print( + f"FAIL: Lemonade Server is not responding at " + f"http://{args.host}:{args.port}/api/v1/health (--no-install set)." + ) + return 3 + _print("Lemonade Server is not running; launching it.") + try: + launch_server(cli, args.host, args.port) + except RuntimeError as exc: + _print(f"FAIL: could not launch the server: {exc}") + return 3 + if not wait_for_server(args.host, args.port): + _print( + f"FAIL: launched the server but it never became reachable at " + f"http://{args.host}:{args.port}/api/v1/health." + ) + return 3 _print(f"server reachable at http://{args.host}:{args.port}") - if not args.skip_pull: - downloaded = list_downloaded_models(args.host, args.port) - selected_models = dict.fromkeys( - (args.image_model, args.tts_model, args.stt_model) - ) - for model in selected_models: - if model in downloaded: - _print(f"already downloaded: {model}") - continue - if not pull_model(model): - # Surface the failure but keep going so the user at least gets - # the rule installed for the modalities that did succeed. - _print( - f"continuing without {model}; the rule will reference it " - "but calls will 404 until you pull it." - ) - upsert_agents_md( args.workspace.resolve(), host=args.host,