Skip to content

Commit 245adaf

Browse files
chore(scripts): NVIDIA NIM smoke and repo-agent check
Extract shared HTTP helpers to nvidia_nim_common.py; add smoke scripts for GLM-5.1 and Qwen3-Coder plus a repo-aware prompt check. Keys stay in .env only. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 3dfc238 commit 245adaf

6 files changed

Lines changed: 428 additions & 0 deletions

File tree

.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copy to .env (gitignored). Get a key from https://build.nvidia.com/
2+
NVIDIA_API_KEY=nvapi-your-key-here
3+
NVIDIA_API_BASE=https://integrate.api.nvidia.com/v1
4+
# Lead: plan / review / verify (https://build.nvidia.com/z-ai/glm-5.1)
5+
NVIDIA_GLM_MODEL=z-ai/glm-5.1
6+
# Implement: scoped code changes (https://build.nvidia.com/qwen/qwen3-coder-480b-a35b-instruct)
7+
NVIDIA_QWEN_MODEL=qwen/qwen3-coder-480b-a35b-instruct

scripts/nvidia_glm_smoke.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python3
2+
"""Smoke-test NVIDIA NIM chat API for z-ai/glm-5.1 (lead agent).
3+
4+
Requires NVIDIA_API_KEY. Docs: https://docs.api.nvidia.com/nim/reference/z-ai-glm5.1
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import argparse
10+
import os
11+
import sys
12+
from pathlib import Path
13+
14+
REPO_ROOT = Path(__file__).resolve().parents[1]
15+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
16+
17+
from nvidia_nim_common import ( # noqa: E402
18+
chat_completion,
19+
chat_completion_stream,
20+
load_dotenv,
21+
)
22+
23+
DEFAULT_MODEL = os.environ.get("NVIDIA_GLM_MODEL", "z-ai/glm-5.1")
24+
25+
26+
def main() -> int:
27+
p = argparse.ArgumentParser(description="NVIDIA NIM GLM-5.1 smoke test.")
28+
p.add_argument(
29+
"prompt",
30+
nargs="?",
31+
default="Reply with exactly: NVIDIA GLM-5.1 smoke OK",
32+
)
33+
p.add_argument("--model", default=DEFAULT_MODEL)
34+
p.add_argument("--temperature", type=float, default=1.0)
35+
p.add_argument("--top-p", type=float, default=0.95)
36+
p.add_argument("--max-tokens", type=int, default=64)
37+
p.add_argument("--timeout", type=int, default=300)
38+
p.add_argument("--no-stream", action="store_true")
39+
args = p.parse_args()
40+
41+
load_dotenv(REPO_ROOT / ".env")
42+
api_key = os.environ.get("NVIDIA_API_KEY", "").strip()
43+
if not api_key:
44+
print("Missing NVIDIA_API_KEY.", file=sys.stderr)
45+
return 1
46+
47+
common = {
48+
"api_key": api_key,
49+
"prompt": args.prompt,
50+
"model": args.model,
51+
"temperature": args.temperature,
52+
"top_p": args.top_p,
53+
"max_tokens": args.max_tokens,
54+
"timeout": args.timeout,
55+
}
56+
try:
57+
if args.no_stream:
58+
data = chat_completion(**common)
59+
print(data["choices"][0]["message"])
60+
else:
61+
print("Streaming:", file=sys.stderr)
62+
text = chat_completion_stream(**common)
63+
if not text.strip():
64+
return 1
65+
except Exception as exc:
66+
print(exc, file=sys.stderr)
67+
return 1
68+
return 0
69+
70+
71+
if __name__ == "__main__":
72+
raise SystemExit(main())

scripts/nvidia_nim_common.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Shared stdlib helpers for NVIDIA NIM chat/completions smoke scripts."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import os
7+
import urllib.request
8+
from pathlib import Path
9+
10+
NVIDIA_BASE = "https://integrate.api.nvidia.com/v1"
11+
12+
13+
def load_dotenv(path: Path) -> None:
14+
"""Load KEY=VALUE lines into os.environ without overwriting existing keys."""
15+
if not path.is_file():
16+
return
17+
for line in path.read_text(encoding="utf-8").splitlines():
18+
line = line.strip()
19+
if not line or line.startswith("#") or "=" not in line:
20+
continue
21+
key, _, value = line.partition("=")
22+
key, value = key.strip(), value.strip().strip('"').strip("'")
23+
if key and key not in os.environ:
24+
os.environ[key] = value
25+
26+
27+
def _post_chat(*, api_key: str, payload: dict, timeout: int) -> urllib.request.addinfourl:
28+
req = urllib.request.Request(
29+
f"{NVIDIA_BASE}/chat/completions",
30+
data=json.dumps(payload).encode("utf-8"),
31+
headers={
32+
"Authorization": f"Bearer {api_key}",
33+
"Content-Type": "application/json",
34+
},
35+
method="POST",
36+
)
37+
return urllib.request.urlopen(req, timeout=timeout)
38+
39+
40+
def chat_completion(
41+
*,
42+
api_key: str,
43+
prompt: str,
44+
model: str,
45+
temperature: float,
46+
top_p: float,
47+
max_tokens: int,
48+
timeout: int,
49+
) -> dict:
50+
payload = {
51+
"model": model,
52+
"messages": [{"role": "user", "content": prompt}],
53+
"temperature": temperature,
54+
"top_p": top_p,
55+
"max_tokens": max_tokens,
56+
"stream": False,
57+
}
58+
with _post_chat(api_key=api_key, payload=payload, timeout=timeout) as resp:
59+
return json.loads(resp.read().decode("utf-8"))
60+
61+
62+
def chat_completion_stream(
63+
*,
64+
api_key: str,
65+
prompt: str,
66+
model: str,
67+
temperature: float,
68+
top_p: float,
69+
max_tokens: int,
70+
timeout: int,
71+
) -> str:
72+
"""Stream SSE chunks; return full assistant text."""
73+
payload = {
74+
"model": model,
75+
"messages": [{"role": "user", "content": prompt}],
76+
"temperature": temperature,
77+
"top_p": top_p,
78+
"max_tokens": max_tokens,
79+
"stream": True,
80+
}
81+
parts: list[str] = []
82+
with _post_chat(api_key=api_key, payload=payload, timeout=timeout) as resp:
83+
for raw in resp:
84+
line = raw.decode("utf-8", errors="replace").strip()
85+
if not line.startswith("data:"):
86+
continue
87+
data = line.removeprefix("data:").strip()
88+
if data == "[DONE]":
89+
break
90+
chunk = json.loads(data)
91+
delta = chunk["choices"][0].get("delta") or {}
92+
text = delta.get("content") or ""
93+
if text:
94+
parts.append(text)
95+
print(text, end="", flush=True)
96+
if parts:
97+
print()
98+
return "".join(parts)

scripts/nvidia_qwen_diag.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Quick NVIDIA API connectivity diag (no secrets printed)."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import os
7+
import time
8+
import urllib.error
9+
import urllib.request
10+
from pathlib import Path
11+
12+
REPO_ROOT = Path(__file__).resolve().parents[1]
13+
14+
15+
def load_key() -> str:
16+
for line in (REPO_ROOT / ".env").read_text(encoding="utf-8").splitlines():
17+
if line.strip().startswith("NVIDIA_API_KEY="):
18+
return line.split("=", 1)[1].strip().strip('"').strip("'")
19+
return os.environ.get("NVIDIA_API_KEY", "")
20+
21+
22+
def probe(key: str, label: str, timeout: int) -> None:
23+
payload = json.dumps(
24+
{
25+
"model": "qwen/qwen3-coder-480b-a35b-instruct",
26+
"messages": [{"role": "user", "content": "hi"}],
27+
"max_tokens": 1,
28+
"stream": False,
29+
}
30+
).encode()
31+
req = urllib.request.Request(
32+
"https://integrate.api.nvidia.com/v1/chat/completions",
33+
data=payload,
34+
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
35+
method="POST",
36+
)
37+
t0 = time.time()
38+
try:
39+
with urllib.request.urlopen(req, timeout=timeout) as resp:
40+
resp.read()
41+
print(f"{label}: OK in {time.time() - t0:.1f}s")
42+
except urllib.error.HTTPError as exc:
43+
body = exc.read(500).decode("utf-8", errors="replace")
44+
print(f"{label}: HTTP {exc.code} in {time.time() - t0:.1f}s — {body[:200]}")
45+
except TimeoutError:
46+
print(f"{label}: TIMEOUT after {timeout}s")
47+
except OSError as exc:
48+
print(f"{label}: {type(exc).__name__} in {time.time() - t0:.1f}s — {exc}")
49+
50+
51+
def main() -> None:
52+
real = load_key()
53+
print(f"real_key_len={len(real)} prefix={real[:12]}..." if real else "real_key_missing")
54+
probe("INVALID", "invalid_key", 30)
55+
if real:
56+
probe(real, "real_key", 60)
57+
58+
59+
if __name__ == "__main__":
60+
main()

scripts/nvidia_qwen_smoke.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/usr/bin/env python3
2+
"""Smoke-test NVIDIA NIM chat API for qwen/qwen3-coder-480b-a35b-instruct.
3+
4+
Requires NVIDIA_API_KEY in the environment (never commit the key).
5+
Docs: https://docs.api.nvidia.com/nim/reference/qwen-qwen3-coder-480b-a35b-instruct-infer
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import argparse
11+
import json
12+
import os
13+
import sys
14+
import urllib.error
15+
from pathlib import Path
16+
17+
REPO_ROOT = Path(__file__).resolve().parents[1]
18+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
19+
20+
from nvidia_nim_common import ( # noqa: E402
21+
chat_completion,
22+
chat_completion_stream,
23+
load_dotenv,
24+
)
25+
26+
DEFAULT_MODEL = "qwen/qwen3-coder-480b-a35b-instruct"
27+
28+
29+
def main() -> int:
30+
p = argparse.ArgumentParser(description="NVIDIA NIM Qwen3 Coder smoke test.")
31+
p.add_argument(
32+
"prompt",
33+
nargs="?",
34+
default="Reply with exactly: NVIDIA NIM smoke OK",
35+
help="User message content (default is a tiny connectivity check).",
36+
)
37+
p.add_argument("--model", default=DEFAULT_MODEL)
38+
p.add_argument("--temperature", type=float, default=0.7)
39+
p.add_argument("--top-p", type=float, default=0.8)
40+
p.add_argument("--max-tokens", type=int, default=32)
41+
p.add_argument("--timeout", type=int, default=300, help="HTTP timeout seconds.")
42+
p.add_argument(
43+
"--no-stream",
44+
action="store_true",
45+
help="Wait for full JSON response (slower for large models).",
46+
)
47+
args = p.parse_args()
48+
49+
load_dotenv(REPO_ROOT / ".env")
50+
api_key = os.environ.get("NVIDIA_API_KEY", "").strip()
51+
if not api_key:
52+
print(
53+
"Missing NVIDIA_API_KEY. Copy .env.example to .env or set the variable.",
54+
file=sys.stderr,
55+
)
56+
return 1
57+
if api_key.startswith("nvapi-your-key"):
58+
print("Replace placeholder key in .env with a real nvapi- key.", file=sys.stderr)
59+
return 1
60+
61+
common = {
62+
"api_key": api_key,
63+
"prompt": args.prompt,
64+
"model": args.model,
65+
"temperature": args.temperature,
66+
"top_p": args.top_p,
67+
"max_tokens": args.max_tokens,
68+
"timeout": args.timeout,
69+
}
70+
try:
71+
if args.no_stream:
72+
data = chat_completion(**common)
73+
message = data["choices"][0]["message"]
74+
print(json.dumps(message, indent=2, ensure_ascii=False))
75+
else:
76+
print("Streaming (first token = API OK):", file=sys.stderr)
77+
text = chat_completion_stream(**common)
78+
if not text.strip():
79+
print("No content in stream.", file=sys.stderr)
80+
return 1
81+
except urllib.error.HTTPError as exc:
82+
body = exc.read().decode("utf-8", errors="replace")
83+
print(f"HTTP {exc.code}: {body}", file=sys.stderr)
84+
if exc.code == 403:
85+
print("Check NVIDIA_API_KEY at build.nvidia.com (rotate if exposed).", file=sys.stderr)
86+
return 1
87+
except TimeoutError:
88+
msg = f"Timed out after {args.timeout}s. Retry or use --timeout 600."
89+
print(msg, file=sys.stderr)
90+
return 1
91+
92+
return 0
93+
94+
95+
if __name__ == "__main__":
96+
raise SystemExit(main())

0 commit comments

Comments
 (0)