chore(scripts): NVIDIA NIM smoke and repo-agent check

JohnCCarter · cursoragent · JohnCCarter · commit 245adafbffde · 2026-06-05T09:02:29.000+02:00
Extract shared HTTP helpers to nvidia_nim_common.py; add smoke scripts for
GLM-5.1 and Qwen3-Coder plus a repo-aware prompt check. Keys stay in .env only.

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/.env.example b/.env.example
@@ -0,0 +1,7 @@
+# Copy to .env (gitignored). Get a key from https://build.nvidia.com/
+NVIDIA_API_KEY=nvapi-your-key-here
+NVIDIA_API_BASE=https://integrate.api.nvidia.com/v1
+# Lead: plan / review / verify (https://build.nvidia.com/z-ai/glm-5.1)
+NVIDIA_GLM_MODEL=z-ai/glm-5.1
+# Implement: scoped code changes (https://build.nvidia.com/qwen/qwen3-coder-480b-a35b-instruct)
+NVIDIA_QWEN_MODEL=qwen/qwen3-coder-480b-a35b-instruct
diff --git a/scripts/nvidia_glm_smoke.py b/scripts/nvidia_glm_smoke.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+"""Smoke-test NVIDIA NIM chat API for z-ai/glm-5.1 (lead agent).
+
+Requires NVIDIA_API_KEY. Docs: https://docs.api.nvidia.com/nim/reference/z-ai-glm5.1
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+
+from nvidia_nim_common import (  # noqa: E402
+    chat_completion,
+    chat_completion_stream,
+    load_dotenv,
+)
+
+DEFAULT_MODEL = os.environ.get("NVIDIA_GLM_MODEL", "z-ai/glm-5.1")
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="NVIDIA NIM GLM-5.1 smoke test.")
+    p.add_argument(
+        "prompt",
+        nargs="?",
+        default="Reply with exactly: NVIDIA GLM-5.1 smoke OK",
+    )
+    p.add_argument("--model", default=DEFAULT_MODEL)
+    p.add_argument("--temperature", type=float, default=1.0)
+    p.add_argument("--top-p", type=float, default=0.95)
+    p.add_argument("--max-tokens", type=int, default=64)
+    p.add_argument("--timeout", type=int, default=300)
+    p.add_argument("--no-stream", action="store_true")
+    args = p.parse_args()
+
+    load_dotenv(REPO_ROOT / ".env")
+    api_key = os.environ.get("NVIDIA_API_KEY", "").strip()
+    if not api_key:
+        print("Missing NVIDIA_API_KEY.", file=sys.stderr)
+        return 1
+
+    common = {
+        "api_key": api_key,
+        "prompt": args.prompt,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "timeout": args.timeout,
+    }
+    try:
+        if args.no_stream:
+            data = chat_completion(**common)
+            print(data["choices"][0]["message"])
+        else:
+            print("Streaming:", file=sys.stderr)
+            text = chat_completion_stream(**common)
+            if not text.strip():
+                return 1
+    except Exception as exc:
+        print(exc, file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/nvidia_nim_common.py b/scripts/nvidia_nim_common.py
@@ -0,0 +1,98 @@
+"""Shared stdlib helpers for NVIDIA NIM chat/completions smoke scripts."""
+
+from __future__ import annotations
+
+import json
+import os
+import urllib.request
+from pathlib import Path
+
+NVIDIA_BASE = "https://integrate.api.nvidia.com/v1"
+
+
+def load_dotenv(path: Path) -> None:
+    """Load KEY=VALUE lines into os.environ without overwriting existing keys."""
+    if not path.is_file():
+        return
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, _, value = line.partition("=")
+        key, value = key.strip(), value.strip().strip('"').strip("'")
+        if key and key not in os.environ:
+            os.environ[key] = value
+
+
+def _post_chat(*, api_key: str, payload: dict, timeout: int) -> urllib.request.addinfourl:
+    req = urllib.request.Request(
+        f"{NVIDIA_BASE}/chat/completions",
+        data=json.dumps(payload).encode("utf-8"),
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+        method="POST",
+    )
+    return urllib.request.urlopen(req, timeout=timeout)
+
+
+def chat_completion(
+    *,
+    api_key: str,
+    prompt: str,
+    model: str,
+    temperature: float,
+    top_p: float,
+    max_tokens: int,
+    timeout: int,
+) -> dict:
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_tokens": max_tokens,
+        "stream": False,
+    }
+    with _post_chat(api_key=api_key, payload=payload, timeout=timeout) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def chat_completion_stream(
+    *,
+    api_key: str,
+    prompt: str,
+    model: str,
+    temperature: float,
+    top_p: float,
+    max_tokens: int,
+    timeout: int,
+) -> str:
+    """Stream SSE chunks; return full assistant text."""
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_tokens": max_tokens,
+        "stream": True,
+    }
+    parts: list[str] = []
+    with _post_chat(api_key=api_key, payload=payload, timeout=timeout) as resp:
+        for raw in resp:
+            line = raw.decode("utf-8", errors="replace").strip()
+            if not line.startswith("data:"):
+                continue
+            data = line.removeprefix("data:").strip()
+            if data == "[DONE]":
+                break
+            chunk = json.loads(data)
+            delta = chunk["choices"][0].get("delta") or {}
+            text = delta.get("content") or ""
+            if text:
+                parts.append(text)
+                print(text, end="", flush=True)
+    if parts:
+        print()
+    return "".join(parts)
diff --git a/scripts/nvidia_qwen_diag.py b/scripts/nvidia_qwen_diag.py
@@ -0,0 +1,60 @@
+"""Quick NVIDIA API connectivity diag (no secrets printed)."""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
+
+def load_key() -> str:
+    for line in (REPO_ROOT / ".env").read_text(encoding="utf-8").splitlines():
+        if line.strip().startswith("NVIDIA_API_KEY="):
+            return line.split("=", 1)[1].strip().strip('"').strip("'")
+    return os.environ.get("NVIDIA_API_KEY", "")
+
+
+def probe(key: str, label: str, timeout: int) -> None:
+    payload = json.dumps(
+        {
+            "model": "qwen/qwen3-coder-480b-a35b-instruct",
+            "messages": [{"role": "user", "content": "hi"}],
+            "max_tokens": 1,
+            "stream": False,
+        }
+    ).encode()
+    req = urllib.request.Request(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        data=payload,
+        headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
+        method="POST",
+    )
+    t0 = time.time()
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            resp.read()
+        print(f"{label}: OK in {time.time() - t0:.1f}s")
+    except urllib.error.HTTPError as exc:
+        body = exc.read(500).decode("utf-8", errors="replace")
+        print(f"{label}: HTTP {exc.code} in {time.time() - t0:.1f}s — {body[:200]}")
+    except TimeoutError:
+        print(f"{label}: TIMEOUT after {timeout}s")
+    except OSError as exc:
+        print(f"{label}: {type(exc).__name__} in {time.time() - t0:.1f}s — {exc}")
+
+
+def main() -> None:
+    real = load_key()
+    print(f"real_key_len={len(real)} prefix={real[:12]}..." if real else "real_key_missing")
+    probe("INVALID", "invalid_key", 30)
+    if real:
+        probe(real, "real_key", 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nvidia_qwen_smoke.py b/scripts/nvidia_qwen_smoke.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""Smoke-test NVIDIA NIM chat API for qwen/qwen3-coder-480b-a35b-instruct.
+
+Requires NVIDIA_API_KEY in the environment (never commit the key).
+Docs: https://docs.api.nvidia.com/nim/reference/qwen-qwen3-coder-480b-a35b-instruct-infer
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import urllib.error
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+
+from nvidia_nim_common import (  # noqa: E402
+    chat_completion,
+    chat_completion_stream,
+    load_dotenv,
+)
+
+DEFAULT_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="NVIDIA NIM Qwen3 Coder smoke test.")
+    p.add_argument(
+        "prompt",
+        nargs="?",
+        default="Reply with exactly: NVIDIA NIM smoke OK",
+        help="User message content (default is a tiny connectivity check).",
+    )
+    p.add_argument("--model", default=DEFAULT_MODEL)
+    p.add_argument("--temperature", type=float, default=0.7)
+    p.add_argument("--top-p", type=float, default=0.8)
+    p.add_argument("--max-tokens", type=int, default=32)
+    p.add_argument("--timeout", type=int, default=300, help="HTTP timeout seconds.")
+    p.add_argument(
+        "--no-stream",
+        action="store_true",
+        help="Wait for full JSON response (slower for large models).",
+    )
+    args = p.parse_args()
+
+    load_dotenv(REPO_ROOT / ".env")
+    api_key = os.environ.get("NVIDIA_API_KEY", "").strip()
+    if not api_key:
+        print(
+            "Missing NVIDIA_API_KEY. Copy .env.example to .env or set the variable.",
+            file=sys.stderr,
+        )
+        return 1
+    if api_key.startswith("nvapi-your-key"):
+        print("Replace placeholder key in .env with a real nvapi- key.", file=sys.stderr)
+        return 1
+
+    common = {
+        "api_key": api_key,
+        "prompt": args.prompt,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "timeout": args.timeout,
+    }
+    try:
+        if args.no_stream:
+            data = chat_completion(**common)
+            message = data["choices"][0]["message"]
+            print(json.dumps(message, indent=2, ensure_ascii=False))
+        else:
+            print("Streaming (first token = API OK):", file=sys.stderr)
+            text = chat_completion_stream(**common)
+            if not text.strip():
+                print("No content in stream.", file=sys.stderr)
+                return 1
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", errors="replace")
+        print(f"HTTP {exc.code}: {body}", file=sys.stderr)
+        if exc.code == 403:
+            print("Check NVIDIA_API_KEY at build.nvidia.com (rotate if exposed).", file=sys.stderr)
+        return 1
+    except TimeoutError:
+        msg = f"Timed out after {args.timeout}s. Retry or use --timeout 600."
+        print(msg, file=sys.stderr)
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/qwen_repo_agent_check.py b/scripts/qwen_repo_agent_check.py