Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 186 additions & 11 deletions scripts/generate_code_snippet_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,191 @@
Reads .snippet.*.py and .snippet.*.ts files from src/code-samples-generated/
and creates corresponding MDX files in src/snippets/code-samples/ for use in docs.

When a snippet uses a LangChain-style model argument (`model="…"` in Python or
`model: "…"` in TypeScript), the generated MDX can be wrapped in <CodeGroup> with the same
seven provider/model options as /oss/deepagents/quickstart (Google, OpenAI, Anthropic,
OpenRouter, Fireworks, Baseten, Ollama). Both `provider:model-id` and bare model names
(for example `claude-sonnet-4-5-20250929`) are recognized.

Snippets are left as a single fenced block when no model argument is found, or when all
model arguments are marked to keep.

To keep a specific model line:

- In Python, put `# KEEP MODEL` on the line immediately before the `model="..."` line.
- In TypeScript, put `// KEEP MODEL` on the line immediately before the `model: "..."` line.

The marker line is stripped during processing and that model occurrence is not
replaced/expanded.

Run as part of `make code-snippets` after Bluehawk extraction.
"""

from __future__ import annotations

import re
from pathlib import Path

# Python: keyword argument model="…" (init_chat_model / create_deep_agent / etc.).
DEEPAGENTS_PY_MODEL_KWARG_RE = re.compile(r'\bmodel\s*=\s*"([^"]+)"')

# TypeScript: object property model: "…" (ChatAnthropic, createDeepAgent, …).
DEEPAGENTS_TS_MODEL_KWARG_RE = re.compile(r'\bmodel\s*:\s*"([^"]+)"')

# Tab title and full `model=` / `model:` token for each variant (matches
# src/oss/deepagents/quickstart.mdx Python tabs; JS uses google-genai spelling).
DEEPAGENTS_QUICKSTART_PY_MODEL_TABS: list[tuple[str, str]] = [
("Google", 'model="google_genai:gemini-3.1-pro-preview"'),
("OpenAI", 'model="openai:gpt-5.4"'),
("Anthropic", 'model="anthropic:claude-sonnet-4-6"'),
("OpenRouter", 'model="openrouter:anthropic/claude-sonnet-4-6"'),
("Fireworks", 'model="fireworks:accounts/fireworks/models/qwen3p5-397b-a17b"'),
("Baseten", 'model="baseten:zai-org/GLM-5"'),
("Ollama", 'model="ollama:devstral-2"'),
]

DEEPAGENTS_QUICKSTART_TS_MODEL_TABS: list[tuple[str, str]] = [
("Google", 'model: "google-genai:gemini-3.1-pro-preview"'),
("OpenAI", 'model: "openai:gpt-5.4"'),
("Anthropic", 'model: "anthropic:claude-sonnet-4-6"'),
("OpenRouter", 'model: "openrouter:anthropic/claude-sonnet-4-6"'),
("Fireworks", 'model: "fireworks:accounts/fireworks/models/qwen3p5-397b-a17b"'),
("Baseten", 'model: "baseten:zai-org/GLM-5"'),
("Ollama", 'model: "ollama:devstral-2"'),
]


def _model_id_from_py_tab_token(tab_token: str) -> str:
m = re.match(r'model="([^"]+)"', tab_token)
if not m:
msg = f"expected model= tab token, got {tab_token!r}"
raise ValueError(msg)
return m.group(1)


def _model_id_from_ts_tab_token(tab_token: str) -> str:
m = re.match(r'model:\s*"([^"]+)"', tab_token)
if not m:
msg = f"expected model: tab token, got {tab_token!r}"
raise ValueError(msg)
return m.group(1)


DEEPAGENTS_PY_SKIP_EXPAND_MODEL_IDS: frozenset[str] = frozenset()
DEEPAGENTS_TS_SKIP_EXPAND_MODEL_IDS: frozenset[str] = frozenset()


def _id_after_first_colon(tab_id: str) -> str:
"""For openai:gpt-5.4 return gpt-5.4; for bare ids return as-is."""
if ":" not in tab_id:
return tab_id
return tab_id.split(":", 1)[1]


KEEP_MODEL_MARKER_PY = "# KEEP MODEL"
KEEP_MODEL_MARKER_TS = "// KEEP MODEL"


def _codegroup_fence(tab_title: str, fence_lang: str, code: str) -> str:
"""One fenced code block inside a <CodeGroup> (indent matches docs conventions)."""
body = "\n".join(" " + line for line in code.splitlines())
return "\n".join(
[
f" ```{fence_lang} {tab_title}",
body,
" ```",
]
)


def _replace_span(text: str, start: int, end: int, replacement: str) -> str:
return text[:start] + replacement + text[end:]


def _expand_to_deepagents_codegroup(
content: str,
*,
canonical_span: tuple[int, int],
tab_definitions: list[tuple[str, str]],
fence_lang: str,
) -> str:
"""Wrap `content` in a CodeGroup, one tab per quickstart model variant."""
start, end = canonical_span
parts = [
_codegroup_fence(
title, fence_lang, _replace_span(content, start, end, model_token)
)
for title, model_token in tab_definitions
]
return "<CodeGroup>\n" + "\n\n".join(parts) + "\n</CodeGroup>\n"


def maybe_expand_deepagents_quickstart_codegroup(
content: str,
*,
language: str,
fence_lang: str,
) -> tuple[str | None, str]:
"""Return (expanded_mdx_or_none, content_with_keep_markers_stripped)."""
model_re: re.Pattern[str]
tab_definitions: list[tuple[str, str]]
keep_marker: str
if language == "python":
model_re = DEEPAGENTS_PY_MODEL_KWARG_RE
tab_definitions = DEEPAGENTS_QUICKSTART_PY_MODEL_TABS
keep_marker = KEEP_MODEL_MARKER_PY
elif language == "ts":
model_re = DEEPAGENTS_TS_MODEL_KWARG_RE
tab_definitions = DEEPAGENTS_QUICKSTART_TS_MODEL_TABS
keep_marker = KEEP_MODEL_MARKER_TS
else:
return None, content

# Strip marker lines while recording which model occurrence to expand.
out_lines: list[str] = []
keep_next_model = False
canonical_span: tuple[int, int] | None = None

for line in content.splitlines(keepends=True):
if line.strip() == keep_marker:
keep_next_model = True
continue

out_offset = sum(len(l) for l in out_lines)
m = model_re.search(line)
if m is not None:
if keep_next_model:
keep_next_model = False
elif canonical_span is None:
canonical_span = (out_offset + m.start(), out_offset + m.end())

out_lines.append(line)

stripped = "".join(out_lines)
if canonical_span is None:
return None, stripped

return (
_expand_to_deepagents_codegroup(
stripped,
canonical_span=canonical_span,
tab_definitions=tab_definitions,
fence_lang=fence_lang,
),
stripped,
)


def format_snippet_mdx(content: str, *, language: str, fence_lang: str) -> str:
"""Return final MDX body for a snippet file."""
content = content.rstrip() + "\n"
expanded, content = maybe_expand_deepagents_quickstart_codegroup(
content, language=language, fence_lang=fence_lang
)
if expanded is not None:
return expanded
return f"```{fence_lang}\n{content.rstrip()}\n```\n"


def main() -> None:
repo_root = Path(__file__).resolve().parent.parent
Expand All @@ -19,29 +199,24 @@ def main() -> None:

snippets_dir.mkdir(parents=True, exist_ok=True)

# Mapping: (glob_pattern, language) for each snippet type
snippet_configs = [
("*.snippet.*.py", "python"),
("*.snippet.*.ts", "ts"),
("*.snippet.*.py", "python", "python"),
("*.snippet.*.ts", "ts", "ts"),
]

# Only process snippets that already have language suffix to
# avoid Bluehawk duplicates
lang_suffix = {"python": "-py", "ts": "-js"}

for glob_pattern, language in snippet_configs:
for glob_pattern, language, fence_lang in snippet_configs:
for snippet_file in generated_dir.glob(glob_pattern):
snippet_name = ".".join(snippet_file.stem.split(".")[2:])
expected_suffix = lang_suffix[language]
# Only process language-specific snippets
# (tool-return-object-py, not tool-return-object) to avoid
# duplicates when Bluehawk emits both suffixed and unsuffixed versions
if not snippet_name.endswith(expected_suffix):
continue

content = snippet_file.read_text(encoding="utf-8")
# Create MDX with fenced code block
mdx_content = f"```{language}\n{content.rstrip()}\n```\n"
mdx_content = format_snippet_mdx(
content, language=language, fence_lang=fence_lang
)
mdx_path = snippets_dir / f"{snippet_name}.mdx"
mdx_path.write_text(mdx_content, encoding="utf-8")
print(f"Generated {mdx_path.relative_to(repo_root)}")
Expand Down
2 changes: 2 additions & 0 deletions src/code-samples/deepagents/content-builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def generate_cover(prompt: str, slug: str) -> str:

client = genai.Client()
response = client.models.generate_content(
# KEEP MODEL
model="gemini-2.5-flash-image",
contents=[prompt],
)
Expand Down Expand Up @@ -84,6 +85,7 @@ def generate_social_image(prompt: str, platform: str, slug: str) -> str:

client = genai.Client()
response = client.models.generate_content(
# KEEP MODEL
model="gemini-2.5-flash-image",
contents=[prompt],
)
Expand Down
7 changes: 4 additions & 3 deletions src/code-samples/deepagents/content-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const generateCover = tool(
const { GoogleGenerativeAI } = await import("@google/generative-ai");
const genai = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY ?? "");
const model = genai.getGenerativeModel({
// KEEP MODEL
model: "gemini-2.5-flash-image",
});
const result = await model.generateContent(prompt);
Expand Down Expand Up @@ -78,6 +79,7 @@ const generateSocialImage = tool(
const { GoogleGenerativeAI } = await import("@google/generative-ai");
const genai = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY ?? "");
const model = genai.getGenerativeModel({
// KEEP MODEL
model: "gemini-2.5-flash-image",
});
const result = await model.generateContent(prompt);
Expand All @@ -100,9 +102,7 @@ const generateSocialImage = tool(
prompt: z
.string()
.describe("Detailed description of the image to generate."),
platform: z
.string()
.describe('Either "linkedin" or "tweets"'),
platform: z.string().describe('Either "linkedin" or "tweets"'),
slug: z
.string()
.describe("Post slug. Image saves to <platform>/<slug>/image.png"),
Expand All @@ -125,6 +125,7 @@ function createContentWriter() {
};

return createDeepAgent({
model: "anthropic:claude-sonnet-4-6",
memory: ["./AGENTS.md"],
skills: ["./skills/"],
tools: [generateCover, generateSocialImage],
Expand Down
6 changes: 4 additions & 2 deletions src/code-samples/langchain/nostream-tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from langchain_anthropic import ChatAnthropic
from langgraph.graph import START, StateGraph

stream_model = ChatAnthropic(model_name="claude-3-haiku-20240307")
internal_model = ChatAnthropic(model_name="claude-3-haiku-20240307").with_config(
# KEEP MODEL
stream_model = ChatAnthropic(model_name="claude-haiku-4-5-20251001")
# KEEP MODEL
internal_model = ChatAnthropic(model_name="claude-haiku-4-5-20251001").with_config(
{"tags": ["nostream"]}
)

Expand Down
11 changes: 8 additions & 3 deletions src/code-samples/langchain/nostream-tag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import { ChatAnthropic } from "@langchain/anthropic";
import { StateGraph, StateSchema, START } from "@langchain/langgraph";
import * as z from "zod";

const streamModel = new ChatAnthropic({ model: "claude-3-haiku-20240307" });
// KEEP MODEL
const streamModel = new ChatAnthropic({ model: "claude-haiku-4-5-20251001" });
const internalModel = new ChatAnthropic({
model: "claude-3-haiku-20240307",
// KEEP MODEL
model: "claude-haiku-4-5-20251001",
}).withConfig({
tags: ["nostream"],
});
Expand Down Expand Up @@ -42,7 +44,10 @@ const graph = new StateGraph(State)
.addEdge("writeAnswer", "internal_notes")
.compile();

const stream = await graph.stream({ topic: "AI" }, { streamMode: "messages" });
const stream = await graph.stream(
{ topic: "AI", answer: "", notes: "" },
{ streamMode: "messages" },
);
// :snippet-end:

// :remove-start:
Expand Down
1 change: 1 addition & 0 deletions src/code-samples/langchain/streaming-reasoning-tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def get_weather(city: str) -> str:


model = ChatAnthropic(
# KEEP MODEL
model_name="claude-sonnet-4-6",
timeout=None,
stop=None,
Expand Down
1 change: 1 addition & 0 deletions src/code-samples/langchain/streaming-reasoning-tokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const getWeather = tool(

const agent = createAgent({
model: new ChatAnthropic({
// KEEP MODEL
model: "claude-sonnet-4-6",
thinking: { type: "enabled", budget_tokens: 5000 },
}),
Expand Down
Loading
Loading