Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

148 changes: 107 additions & 41 deletions index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import { Experimental_Agent as Agent, hasToolCall, stepCountIs } from "ai";
import { experimental_createMCPClient as createMCPClient } from "./node_modules/@ai-sdk/mcp/dist/index.mjs";
import { Experimental_StdioMCPTransport as StdioMCPTransport } from "./node_modules/@ai-sdk/mcp/dist/mcp-stdio/index.mjs";
import { writeFileSync, mkdirSync, existsSync, readFileSync } from "node:fs";
import {
writeFileSync,
mkdirSync,
existsSync,
readFileSync,
rmSync,
} from "node:fs";
import { randomUUID } from "node:crypto";
import { join } from "node:path";
import pLimit from "p-limit";
import {
generateReport,
calculateUnitTestTotals,
Expand All @@ -15,10 +24,10 @@ import {
withRetry,
buildAgentPrompt,
simulateCacheSavings,
TestLogger,
} from "./lib/utils.ts";
import { discoverTests, type TestDefinition } from "./lib/test-discovery.ts";
import {
setupOutputsDirectory,
cleanupOutputsDirectory,
cleanupTestEnvironment,
runTestVerification,
Expand Down Expand Up @@ -52,6 +61,7 @@ interface SavedSettings {
mcpServerUrl?: string;
testingTool: boolean;
pricingEnabled: boolean;
concurrencyLimit?: number;
}

function loadSettings(): SavedSettings | null {
Expand Down Expand Up @@ -276,12 +286,31 @@ async function selectOptions() {
process.exit(0);
}

const concurrencyLimitInput = await text({
message: "Max concurrent tests? (0 = unlimited)",
initialValue: savedSettings?.concurrencyLimit?.toString() ?? "0",
validate: (value) => {
const num = parseInt(value ?? "0", 10);
if (isNaN(num) || num < 0) {
return "Please enter a non-negative number";
}
},
});

if (isCancel(concurrencyLimitInput)) {
cancel("Operation cancelled.");
process.exit(0);
}

const concurrencyLimit = parseInt(concurrencyLimitInput || "0", 10);

const newSettings: SavedSettings = {
models: selectedModels,
mcpIntegration: mcpIntegrationType,
mcpServerUrl: mcp,
testingTool,
pricingEnabled: pricing.enabled,
concurrencyLimit,
};
saveSettings(newSettings);

Expand All @@ -290,6 +319,7 @@ async function selectOptions() {
mcp,
testingTool,
pricing,
concurrencyLimit,
};
}

Expand All @@ -310,16 +340,19 @@ async function runSingleTest(
testComponentEnabled: boolean,
testIndex: number,
totalTests: number,
outputDir: string,
): Promise<SingleTestResult> {
console.log(`\n[${testIndex + 1}/${totalTests}] Running test: ${test.name}`);
console.log("─".repeat(50));
const logger = new TestLogger(test.name);
logger.log(`[${testIndex + 1}/${totalTests}] Running test: ${test.name}`);

const messages = buildAgentPrompt(test);

try {
const tools = {
ResultWrite: resultWriteTool,
...(testComponentEnabled && { TestComponent: testComponentTool(test) }),
...(testComponentEnabled && {
TestComponent: testComponentTool(test, outputDir),
}),
...(mcpClient ? await mcpClient.tools() : {}),
};

Expand All @@ -333,18 +366,18 @@ async function runSingleTest(
return;
}
stepCounter++;
console.log(` Step ${stepCounter}:`);
logger.log(` Step ${stepCounter}:`);
if (step.text) {
const preview =
step.text.length > 100
? step.text.slice(0, 100) + "..."
: step.text;
console.log(`💬 Text: ${preview}`);
logger.log(`💬 Text: ${preview}`);
}
if (step.toolCalls && step.toolCalls.length > 0) {
for (const call of step.toolCalls) {
if (call) {
console.log(`🔧 Tool call: ${call.toolName}`);
logger.log(`🔧 Tool call: ${call.toolName}`);
}
}
}
Expand All @@ -356,16 +389,16 @@ async function runSingleTest(
resultStr.length > 80
? resultStr.slice(0, 80) + "..."
: resultStr;
console.log(`📤 Tool result: ${preview}`);
logger.log(`📤 Tool result: ${preview}`);
}
}
}
},
});

console.log(" ⏳ Running agent...");
logger.log(" ⏳ Running agent...");
if (testComponentEnabled) {
console.log(" 📋 TestComponent tool is available");
logger.log(" 📋 TestComponent tool is available");
}

const result = await withRetry(async () => agent.generate({ messages }), {
Expand All @@ -377,7 +410,8 @@ async function runSingleTest(
const resultWriteContent = extractResultWriteContent(result.steps);

if (!resultWriteContent) {
console.log(" ⚠️ No ResultWrite output found");
logger.log(" ⚠️ No ResultWrite output found");
logger.flush();
const promptContent = messages[0]?.content;
const promptStr = promptContent
? typeof promptContent === "string"
Expand All @@ -402,42 +436,48 @@ async function runSingleTest(
};
}

console.log(" ✓ Component generated");
logger.log(" ✓ Component generated");

console.log(" ⏳ Verifying against tests...");
const verification = await runTestVerification(test, resultWriteContent);
logger.log(" ⏳ Verifying against tests...");
const verification = await runTestVerification(
test,
resultWriteContent,
outputDir,
);

if (verification.validation) {
if (verification.validation.valid) {
console.log(" ✓ Code validation passed");
logger.log(" ✓ Code validation passed");
} else {
console.log(" ✗ Code validation failed:");
logger.log(" ✗ Code validation failed:");
for (const error of verification.validation.errors) {
console.log(` - ${error}`);
logger.log(` - ${error}`);
}
}
}

if (verification.validationFailed) {
console.log(
logger.log(
` ⊘ Validation failed (${verification.numPassed}/${verification.numTests} tests passed)`,
);
} else if (verification.passed) {
console.log(
logger.log(
` ✓ All tests passed (${verification.numPassed}/${verification.numTests})`,
);
} else {
console.log(
logger.log(
` ✗ Tests failed (${verification.numFailed}/${verification.numTests} failed)`,
);
if (verification.failedTests) {
for (const ft of verification.failedTests) {
console.log(` - ${ft.fullName}`);
logger.log(` - ${ft.fullName}`);
}
}
}

cleanupTestEnvironment(test.name);
cleanupTestEnvironment(test.name, outputDir);

logger.flush();

const promptContent = messages[0]?.content;
if (!promptContent) {
Expand All @@ -456,7 +496,8 @@ async function runSingleTest(
verification,
};
} catch (error) {
console.error(` ✗ Error running test: ${error}`);
logger.log(` ✗ Error running test: ${error}`);
logger.flush();
const promptContent = messages[0]?.content;
const promptStr = promptContent
? typeof promptContent === "string"
Expand All @@ -482,7 +523,8 @@ async function runSingleTest(
}

async function main() {
const { models, mcp, testingTool, pricing } = await selectOptions();
const { models, mcp, testingTool, pricing, concurrencyLimit } =
await selectOptions();

const mcpServerUrl = mcp;
const mcpEnabled = !!mcp;
Expand Down Expand Up @@ -524,6 +566,10 @@ async function main() {
`🧪 TestComponent Tool: ${testComponentEnabled ? "Enabled" : "Disabled"}`,
);

console.log(
`⚡ Concurrency: ${concurrencyLimit === 0 ? "Unlimited" : concurrencyLimit}`,
);

console.log("\n📁 Discovering tests...");
const tests = discoverTests();
console.log(
Expand All @@ -535,7 +581,11 @@ async function main() {
process.exit(1);
}

setupOutputsDirectory();
// Create base outputs directory (individual tests will use unique subdirectories)
const baseOutputsDir = join(process.cwd(), "outputs");
if (!existsSync(baseOutputsDir)) {
mkdirSync(baseOutputsDir, { recursive: true });
}

let mcpClient = null;
if (mcpEnabled) {
Expand All @@ -557,6 +607,9 @@ async function main() {
}
}

// Set up concurrency limiter
const limit = pLimit(concurrencyLimit === 0 ? Infinity : concurrencyLimit);

let totalFailed = 0;

for (const modelId of models) {
Expand All @@ -575,22 +628,34 @@ async function main() {

const model = gateway.languageModel(modelId);

const testResults: SingleTestResult[] = [];
const startTime = Date.now();

for (let i = 0; i < tests.length; i++) {
const test = tests[i];
if (!test) continue;
const result = await runSingleTest(
test,
model,
mcpClient,
testComponentEnabled,
i,
tests.length,
);
testResults.push(result);
}
// Run tests in parallel with unique output directories
const testPromises = tests.map((test, i) =>
limit(async () => {
const uniqueOutputDir = join(baseOutputsDir, randomUUID());
mkdirSync(uniqueOutputDir, { recursive: true });

try {
return await runSingleTest(
test,
model,
mcpClient,
testComponentEnabled,
i,
tests.length,
uniqueOutputDir,
);
} finally {
// Clean up the unique directory after test completes
if (existsSync(uniqueOutputDir)) {
rmSync(uniqueOutputDir, { recursive: true, force: true });
}
}
}),
);

const testResults = await Promise.all(testPromises);

const totalDuration = Date.now() - startTime;

Expand Down Expand Up @@ -738,7 +803,8 @@ async function main() {
await generateReport(jsonPath, htmlPath);
}

cleanupOutputsDirectory();
// Clean up the base outputs directory
cleanupOutputsDirectory(baseOutputsDir);

process.exit(totalFailed > 0 ? 1 : 0);
}
Expand Down
Loading