Skip to content

Commit 2c6f0ee

Browse files
authored
Fix evals (#81)
* cleanup * get-component-documentation only accepts a single component id * fix versions * use vitest cli instead of node for evals * prefix experiment scripts so they are not picked up by turborepo
1 parent a9321a3 commit 2c6f0ee

File tree

17 files changed

+824
-949
lines changed

17 files changed

+824
-949
lines changed

eval/lib/agents/claude-code-cli.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,7 @@ function getTodoProgress(
285285
messages: ClaudeCodeStreamMessage[],
286286
): TodoProgress | null {
287287
// Find the most recent TodoWrite message
288-
for (let i = messages.length - 1; i >= 0; i--) {
289-
const message = messages[i];
288+
for (const message of messages.toReversed()) {
290289
if (message.type === 'assistant') {
291290
const todoWrite = message.message.content.find(
292291
(c): c is ToolUseContent =>

eval/lib/collect-args.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ export async function collectArgs() {
362362
const config: McpServerConfig = {
363363
[mcpServerName]: {
364364
type: 'stdio',
365-
command,
365+
command: command!,
366366
args: argsParts.length > 0 ? argsParts : undefined,
367367
},
368368
};

eval/lib/evaluations/build.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export async function build({
88
projectPath,
99
resultsPath,
1010
}: ExperimentArgs): Promise<boolean> {
11-
const result = await x('pnpm', ['build'], {
11+
const result = await x('pnpm', ['eval:build'], {
1212
nodeOptions: {
1313
cwd: projectPath,
1414
},

eval/lib/evaluations/prepare-evaluations.ts

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ export async function prepareEvaluations({
99
}: ExperimentArgs) {
1010
await addDevDependency(
1111
[
12-
'vitest@catalog:',
13-
'@vitest/browser-playwright@catalog:',
14-
'storybook@catalog:',
15-
'@storybook/addon-docs@catalog:',
16-
'@storybook/addon-a11y@catalog:',
17-
'@storybook/addon-vitest@catalog:',
18-
'@storybook/react-vite@catalog:',
19-
'eslint-plugin-storybook@catalog:',
12+
'vitest@catalog:experiments',
13+
'@vitest/browser-playwright@catalog:experiments',
14+
'storybook@catalog:experiments',
15+
'@storybook/addon-docs@catalog:experiments',
16+
'@storybook/addon-a11y@catalog:experiments',
17+
'@storybook/addon-vitest@catalog:experiments',
18+
'@storybook/react-vite@catalog:experiments',
19+
'eslint-plugin-storybook@catalog:experiments',
2020
],
2121
{ cwd: projectPath, silent: true },
2222
);
@@ -28,6 +28,20 @@ export async function prepareEvaluations({
2828
filter: (source) =>
2929
!source.includes('node_modules') && !source.includes('dist'),
3030
});
31+
32+
const { default: pkgJson } = await import(
33+
path.join(projectPath, 'package.json'),
34+
{
35+
with: { type: 'json' },
36+
}
37+
);
38+
// add the storybook script after agent execution, so it does not taint the experiment
39+
pkgJson.scripts.storybook = 'storybook dev --port 6006';
40+
await fs.writeFile(
41+
path.join(projectPath, 'package.json'),
42+
JSON.stringify(pkgJson, null, 2),
43+
);
44+
3145
await fs
3246
.cp(
3347
path.join(evalPath, 'expected', 'stories'),

eval/lib/evaluations/test-stories.ts

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,41 @@
1-
import { startVitest } from 'vitest/node';
21
import * as path from 'node:path';
32
import * as fs from 'node:fs/promises';
43
import type { EvaluationSummary, ExperimentArgs } from '../../types';
54
import type { JsonTestResults } from 'vitest/reporters';
5+
import { x } from 'tinyexec';
6+
import { dedent } from 'ts-dedent';
67

78
export async function testStories({
89
projectPath,
910
resultsPath,
1011
}: ExperimentArgs): Promise<Pick<EvaluationSummary, 'test' | 'a11y'>> {
1112
const testResultsPath = path.join(resultsPath, 'tests.json');
1213

13-
const vitest = await startVitest('test', undefined, {
14-
root: projectPath,
15-
watch: false,
16-
silent: true,
17-
reporters: ['json'],
18-
outputFile: testResultsPath,
14+
const result = await x('pnpm', ['eval:test'], {
15+
nodeOptions: {
16+
cwd: projectPath,
17+
},
1918
});
2019

21-
await vitest.close();
20+
await fs.writeFile(
21+
path.join(resultsPath, 'tests.md'),
22+
dedent`# Test Results
23+
24+
**Exit Code:** ${result.exitCode}
25+
26+
## stdout
27+
28+
\`\`\`sh
29+
${result.stdout}
30+
\`\`\`
31+
32+
## stderr
33+
34+
\`\`\`
35+
${result.stderr}
36+
\`\`\`
37+
`,
38+
);
2239

2340
const { default: jsonTestResults } = (await import(testResultsPath, {
2441
with: { type: 'json' },

eval/lib/evaluations/typecheck.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ if (import.meta.main) {
109109
}
110110
console.log({
111111
typeErrors: await checkTypes({
112-
projectPath: path.join(experimentPath[0], 'project'),
113-
resultsPath: path.join(experimentPath[0], 'results'),
112+
projectPath: path.join(experimentPath[0]!, 'project'),
113+
resultsPath: path.join(experimentPath[0]!, 'results'),
114114
} as ExperimentArgs),
115115
});
116116
}

eval/lib/save/chromatic.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export async function buildStorybook(
2222
let buildError = '';
2323

2424
try {
25-
await runScript('build-storybook', {
25+
await runScript('eval:build-storybook', {
2626
cwd: projectPath,
2727
silent: true,
2828
});

eval/lib/save/google-sheet.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ function getContextDetails(context: Context): string {
4444
const mcpConfig = Object.values(context.mcpServerConfig)[0];
4545
if (mcpConfig?.type === 'stdio' && mcpConfig.args) {
4646
const manifestIndex = mcpConfig.args.indexOf('--manifestPath');
47-
if (manifestIndex !== -1 && mcpConfig.args[manifestIndex + 1]) {
48-
return path.basename(mcpConfig.args[manifestIndex + 1]);
47+
const manifestIndexValue = mcpConfig.args[manifestIndex + 1];
48+
if (manifestIndex !== -1 && manifestIndexValue) {
49+
return path.basename(manifestIndexValue);
4950
}
5051
}
5152
return 'unknown manifest name';

eval/package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55
"description": "The project for evaluating UI component development with and without Storybook MCP",
66
"type": "module",
77
"scripts": {
8-
"build-storybook": "storybook build",
98
"eval": "node eval.ts",
10-
"storybook": "storybook dev -p 6006"
9+
"storybook": "storybook dev -p 6006",
10+
"typecheck": "tsc"
1111
},
1212
"devDependencies": {
1313
"@anthropic-ai/claude-agent-sdk": "^0.1.30",
1414
"@clack/prompts": "1.0.0-alpha.6",
1515
"@radix-ui/colors": "^3.0.0",
16-
"chromatic": "^13.3.3",
1716
"@radix-ui/react-popover": "^1.1.15",
1817
"@radix-ui/react-toggle": "^1.1.10",
1918
"@radix-ui/react-toggle-group": "^1.1.11",
@@ -24,9 +23,11 @@
2423
"@tsconfig/node24": "^24.0.1",
2524
"@types/envinfo": "^7.8.4",
2625
"@types/eslint": "^9.6.1",
26+
"@types/node": "^24.10.1",
2727
"@types/react": "^18.3.26",
2828
"@vitejs/plugin-react-swc": "^4.2.0",
2929
"ai-tokenizer": "^1.0.3",
30+
"chromatic": "^13.3.3",
3031
"envinfo": "^7.20.0",
3132
"eslint": "^9.36.0",
3233
"globals": "^16.4.0",

eval/templates/evaluation/eslint.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export default defineConfig([
1212
extends: [
1313
js.configs.recommended,
1414
tseslint.configs.recommendedTypeChecked,
15-
reactHooks.configs['recommended-latest'],
15+
reactHooks.configs.flat['recommended-latest'],
1616
reactRefresh.configs.vite,
1717
],
1818
languageOptions: {

0 commit comments

Comments
 (0)