Skip to content

Commit 9d187c2

Browse files
authored
Merge pull request #20 from Haruhiko-Joe/pr/ablation-docs
Add ablation docs generator
2 parents cb7c65b + 78de4cf commit 9d187c2

2 files changed

Lines changed: 284 additions & 1 deletion

File tree

bench/README.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,32 @@ pnpm exec tsx bench/scripts/generate-qa.ts --project git --count 10 --providers
7070

7171
基于生成的 QA 对,让模型仅通过文档工具(doc-drill)回答问题,再用 judge 对比 gold answer 和采分点打分。
7272

73+
## 生成消融文档
74+
75+
为结构消融实验生成文档变体(完整、去边、扁平 MD):
76+
77+
```bash
78+
# 从项目根目录执行
79+
pnpm exec tsx bench/scripts/generate-ablation-docs.ts [options]
80+
```
81+
82+
| 参数 | 默认值 | 说明 |
83+
|---|---|---|
84+
| `--project` | `git` | 项目名(对应 `src/souko/doc/{project}`|
85+
| `--doc-root` | `src/souko/doc` | ACCEED 文档根目录 |
86+
| `--out-root` | `bench/data/ablation-docs` | 输出目录 |
87+
| `--variants` | `full,no-edges,flat-md` | 逗号分隔的变体列表 |
88+
| `--overwrite` | 关闭 | 覆盖已有输出 |
89+
90+
输出结构示例:
91+
92+
```
93+
bench/data/ablation-docs/
94+
├── full/{project}/ ...
95+
├── no-edges/{project}/ ...
96+
└── flat-md/{project}/ ...
97+
```
98+
7399
## 目录结构
74100

75101
```
@@ -79,7 +105,8 @@ bench/
79105
├── vite.config.ts
80106
├── index.html
81107
├── scripts/
82-
│ └── generate-qa.ts # QA 生成脚本
108+
│ ├── generate-qa.ts # QA 生成脚本
109+
│ └── generate-ablation-docs.ts # 生成消融文档变体
83110
├── src/
84111
│ ├── main.ts
85112
│ ├── App.vue
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
import { cp, mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
2+
import path from "node:path";
3+
4+
type Variant = "full" | "no-edges" | "flat-md";
5+
6+
type Options = {
7+
project: string;
8+
docRoot: string;
9+
outRoot: string;
10+
variants: Variant[];
11+
overwrite: boolean;
12+
};
13+
14+
const VARIANTS: readonly Variant[] = ["full", "no-edges", "flat-md"];
15+
const SKIP_DIRS = new Set(["_pending"]);
16+
const SKIP_JSON = new Set(["flows.json"]);
17+
18+
function usage(): string {
19+
return [
20+
"Usage: pnpm exec tsx bench/scripts/generate-ablation-docs.ts [options]",
21+
"",
22+
"Creates documentation variants for ablation experiments:",
23+
" - full: original ACCEED docs",
24+
" - no-edges: remove typed edges from top/graph nodes (tree only)",
25+
" - flat-md: flatten all leaf markdown files into one directory",
26+
"",
27+
"Options:",
28+
" --project <name> Project name in doc root (default: git)",
29+
" --doc-root <path> ACCEED docs root (default: src/souko/doc)",
30+
" --out-root <path> Output root (default: bench/data/ablation-docs)",
31+
" --variants <list> Comma-separated: full,no-edges,flat-md",
32+
" --overwrite Replace existing outputs",
33+
" --help Show this help",
34+
].join("\n");
35+
}
36+
37+
function parseFlagMap(argv: string[]): Map<string, string> {
38+
const values = new Map<string, string>();
39+
let index = 0;
40+
while (index < argv.length) {
41+
const token = argv[index];
42+
if (token === undefined) break;
43+
if (!token.startsWith("--")) throw new Error(`Unexpected argument: ${token}`);
44+
const key = token.slice(2);
45+
const next = argv[index + 1];
46+
if (next === undefined || next.startsWith("--")) {
47+
values.set(key, "true");
48+
index += 1;
49+
} else {
50+
values.set(key, next);
51+
index += 2;
52+
}
53+
}
54+
return values;
55+
}
56+
57+
function splitList(value: string): string[] {
58+
return value.split(",").map((part) => part.trim()).filter((part) => part.length > 0);
59+
}
60+
61+
async function assertDirectory(dir: string, label: string): Promise<void> {
62+
const info = await stat(dir).catch(() => undefined);
63+
if (!info?.isDirectory()) throw new Error(`${label} is not a directory: ${dir}`);
64+
}
65+
66+
async function assertFile(filePath: string, label: string): Promise<void> {
67+
const info = await stat(filePath).catch(() => undefined);
68+
if (!info?.isFile()) throw new Error(`${label} is not a file: ${filePath}`);
69+
}
70+
71+
function parseArgs(argv: string[]): Options {
72+
const values = parseFlagMap(argv);
73+
if (values.has("help") || values.has("h")) {
74+
console.log(usage());
75+
process.exit(0);
76+
}
77+
78+
const variantsValue = values.get("variants") ?? VARIANTS.join(",");
79+
const variants = splitList(variantsValue);
80+
for (const variant of variants) {
81+
if (!VARIANTS.includes(variant as Variant)) {
82+
throw new Error(`Unknown variant: ${variant}`);
83+
}
84+
}
85+
86+
return {
87+
project: values.get("project") ?? "git",
88+
docRoot: path.resolve(values.get("doc-root") ?? "src/souko/doc"),
89+
outRoot: path.resolve(values.get("out-root") ?? "bench/data/ablation-docs"),
90+
variants: variants as Variant[],
91+
overwrite: values.has("overwrite"),
92+
};
93+
}
94+
95+
async function pathExists(filePath: string): Promise<boolean> {
96+
const info = await stat(filePath).catch(() => undefined);
97+
return info !== undefined;
98+
}
99+
100+
async function ensureEmptyTarget(dir: string, overwrite: boolean): Promise<void> {
101+
if (await pathExists(dir)) {
102+
if (!overwrite) {
103+
throw new Error(`Target already exists: ${dir} (use --overwrite to replace)`);
104+
}
105+
await rm(dir, { recursive: true, force: true });
106+
}
107+
await mkdir(dir, { recursive: true });
108+
}
109+
110+
async function removePendingDirs(root: string): Promise<void> {
111+
const entries = await readdir(root, { withFileTypes: true });
112+
await Promise.all(entries.map(async (entry) => {
113+
if (!entry.isDirectory()) return;
114+
const full = path.join(root, entry.name);
115+
if (SKIP_DIRS.has(entry.name)) {
116+
await rm(full, { recursive: true, force: true });
117+
return;
118+
}
119+
if (entry.name.startsWith(".")) return;
120+
await removePendingDirs(full);
121+
}));
122+
}
123+
124+
function stripEdgesFromNodes(raw: Record<string, unknown>): { next: Record<string, unknown>; changed: boolean } {
125+
if (!Array.isArray(raw.nodes)) return { next: raw, changed: false };
126+
let changed = false;
127+
const nextNodes = raw.nodes.map((node) => {
128+
if (!node || typeof node !== "object") return node;
129+
const hasEdges = Object.prototype.hasOwnProperty.call(node, "edges");
130+
const edges = (node as { edges?: unknown }).edges;
131+
if (!hasEdges || !Array.isArray(edges) || edges.length > 0) {
132+
changed = true;
133+
}
134+
return { ...(node as Record<string, unknown>), edges: [] };
135+
});
136+
return { next: { ...raw, nodes: nextNodes }, changed };
137+
}
138+
139+
async function stripEdgesInDir(root: string): Promise<number> {
140+
const entries = await readdir(root, { withFileTypes: true });
141+
let updates = 0;
142+
for (const entry of entries) {
143+
const full = path.join(root, entry.name);
144+
if (entry.isDirectory()) {
145+
if (entry.name.startsWith(".")) continue;
146+
if (SKIP_DIRS.has(entry.name)) continue;
147+
updates += await stripEdgesInDir(full);
148+
continue;
149+
}
150+
if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
151+
if (SKIP_JSON.has(entry.name)) continue;
152+
153+
const raw = JSON.parse(await readFile(full, "utf-8")) as Record<string, unknown>;
154+
const { next, changed } = stripEdgesFromNodes(raw);
155+
if (changed) {
156+
await writeFile(full, JSON.stringify(next, null, 2), "utf-8");
157+
updates += 1;
158+
}
159+
}
160+
return updates;
161+
}
162+
163+
async function collectMdFiles(root: string, base: string, out: string[]): Promise<void> {
164+
const entries = await readdir(root, { withFileTypes: true });
165+
for (const entry of entries) {
166+
const full = path.join(root, entry.name);
167+
if (entry.isDirectory()) {
168+
if (entry.name.startsWith(".")) continue;
169+
if (SKIP_DIRS.has(entry.name)) continue;
170+
await collectMdFiles(full, base, out);
171+
continue;
172+
}
173+
if (entry.isFile() && entry.name.endsWith(".md")) {
174+
out.push(path.relative(base, full));
175+
}
176+
}
177+
}
178+
179+
function flattenName(relativePath: string, used: Map<string, number>): string {
180+
const noExt = relativePath.replace(/\.md$/i, "");
181+
const base = noExt.split(path.sep).join("__");
182+
const count = used.get(base) ?? 0;
183+
used.set(base, count + 1);
184+
return count === 0 ? `${base}.md` : `${base}__${count + 1}.md`;
185+
}
186+
187+
async function buildFlatMd(sourceProject: string, targetProject: string, project: string, overwrite: boolean): Promise<void> {
188+
const mdFiles: string[] = [];
189+
await collectMdFiles(sourceProject, sourceProject, mdFiles);
190+
if (mdFiles.length === 0) {
191+
throw new Error(`No markdown files found under ${sourceProject}`);
192+
}
193+
194+
await ensureEmptyTarget(targetProject, overwrite);
195+
196+
const used = new Map<string, number>();
197+
const manifest: { sourcePath: string; outputFile: string }[] = [];
198+
for (const rel of mdFiles) {
199+
const sourcePath = path.join(sourceProject, rel);
200+
const outputFile = flattenName(rel, used);
201+
const content = await readFile(sourcePath, "utf-8");
202+
await writeFile(path.join(targetProject, outputFile), content, "utf-8");
203+
manifest.push({ sourcePath: rel.split(path.sep).join("/"), outputFile });
204+
}
205+
206+
const manifestPath = path.join(targetProject, "manifest.json");
207+
const manifestContent = JSON.stringify({ project, entries: manifest }, null, 2);
208+
await writeFile(manifestPath, `${manifestContent}\n`, "utf-8");
209+
}
210+
211+
async function copyProject(sourceProject: string, targetProject: string, overwrite: boolean): Promise<void> {
212+
if (await pathExists(targetProject)) {
213+
if (!overwrite) {
214+
throw new Error(`Target already exists: ${targetProject} (use --overwrite to replace)`);
215+
}
216+
await rm(targetProject, { recursive: true, force: true });
217+
}
218+
await mkdir(path.dirname(targetProject), { recursive: true });
219+
await cp(sourceProject, targetProject, { recursive: true });
220+
await removePendingDirs(targetProject);
221+
}
222+
223+
async function main(): Promise<void> {
224+
const options = parseArgs(process.argv.slice(2));
225+
const sourceProject = path.join(options.docRoot, options.project);
226+
227+
await assertDirectory(options.docRoot, "Documentation root");
228+
await assertDirectory(sourceProject, "Project docs directory");
229+
await assertFile(path.join(sourceProject, "top.json"), "Project top.json");
230+
231+
for (const variant of options.variants) {
232+
const targetRoot = path.join(options.outRoot, variant);
233+
const targetProject = path.join(targetRoot, options.project);
234+
235+
if (variant === "flat-md") {
236+
await buildFlatMd(sourceProject, targetProject, options.project, options.overwrite);
237+
continue;
238+
}
239+
240+
await copyProject(sourceProject, targetProject, options.overwrite);
241+
242+
if (variant === "no-edges") {
243+
await stripEdgesInDir(targetProject);
244+
}
245+
}
246+
247+
const selected = options.variants.join(", ");
248+
console.log(`Ablation docs generated for variants: ${selected}`);
249+
console.log(`Output root: ${options.outRoot}`);
250+
}
251+
252+
main().catch((error) => {
253+
const message = error instanceof Error ? error.message : String(error);
254+
console.error(message);
255+
process.exit(1);
256+
});

0 commit comments

Comments
 (0)