Skip to content

Commit 5766baa

Browse files
bugerclaude
andcommitted
feat: Add JSON-aware mermaid diagram validation
Fixes issue where mermaid validation couldn't detect diagrams embedded in JSON string values. Previously, the regex pattern only matched literal markdown code blocks, missing diagrams with escaped newlines in JSON. Changes: - Add extractMermaidFromJson() to detect mermaid in JSON strings - Update extractMermaidFromMarkdown() to auto-detect JSON responses - Add replaceMermaidDiagramsInJson() for proper JSON diagram replacement - Update replaceMermaidDiagramsInMarkdown() to handle both formats - Add comprehensive test coverage for JSON mermaid extraction The validation now works for diagrams in: - Regular markdown code blocks (existing behavior) - JSON string values with escaped newlines (new) - JSON in code blocks (new) - Nested JSON objects and arrays (new) All 323 mermaid/schema tests pass with no regressions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 1585eb0 commit 5766baa

3 files changed

Lines changed: 428 additions & 11 deletions

File tree

npm/src/agent/schemaUtils.js

Lines changed: 170 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,76 @@ export function isMermaidSchema(schema) {
826826
return mermaidIndicators.some(indicator => indicator);
827827
}
828828

829+
/**
830+
* Extract Mermaid diagrams from JSON string values
831+
* Handles escaped newlines and backticks within JSON strings
832+
* @param {string} response - Response that may contain JSON with mermaid blocks in string values
833+
* @returns {Object} - {diagrams: Array, jsonPaths: Array, parsedJson: Object|null}
834+
*/
835+
export function extractMermaidFromJson(response) {
836+
if (!response || typeof response !== 'string') {
837+
return { diagrams: [], jsonPaths: [], parsedJson: null };
838+
}
839+
840+
// Try to extract JSON from code blocks first
841+
let jsonContent = response.trim();
842+
const jsonBlockMatch = jsonContent.match(/```json\s*\n([\s\S]*?)\n```/);
843+
if (jsonBlockMatch) {
844+
jsonContent = jsonBlockMatch[1].trim();
845+
} else {
846+
const anyBlockMatch = jsonContent.match(/```\s*\n([{\[][\s\S]*?[}\]])\s*```/);
847+
if (anyBlockMatch) {
848+
jsonContent = anyBlockMatch[1].trim();
849+
}
850+
}
851+
852+
// Try to parse as JSON
853+
let parsedJson;
854+
try {
855+
parsedJson = JSON.parse(jsonContent);
856+
} catch (e) {
857+
return { diagrams: [], jsonPaths: [], parsedJson: null };
858+
}
859+
860+
const diagrams = [];
861+
const jsonPaths = [];
862+
863+
// Recursively search for mermaid diagrams in JSON string values
864+
function searchObject(obj, path = []) {
865+
if (typeof obj === 'string') {
866+
// Look for mermaid code blocks in the string value
867+
// Handle both escaped (\n) and literal newlines
868+
const mermaidPattern = /```mermaid([^\n`]*?)(?:\n|\\n)([\s\S]*?)```/gi;
869+
let match;
870+
871+
while ((match = mermaidPattern.exec(obj)) !== null) {
872+
const attributes = match[1] ? match[1].trim() : '';
873+
// Unescape the content (replace \\n with actual newlines)
874+
const content = match[2].replace(/\\n/g, '\n');
875+
876+
diagrams.push({
877+
content: content,
878+
fullMatch: match[0],
879+
startIndex: match.index,
880+
endIndex: match.index + match[0].length,
881+
attributes: attributes,
882+
isInJson: true,
883+
jsonPath: path.join('.')
884+
});
885+
jsonPaths.push(path.join('.'));
886+
}
887+
} else if (Array.isArray(obj)) {
888+
obj.forEach((item, index) => searchObject(item, [...path, `[${index}]`]));
889+
} else if (obj && typeof obj === 'object') {
890+
Object.entries(obj).forEach(([key, value]) => searchObject(value, [...path, key]));
891+
}
892+
}
893+
894+
searchObject(parsedJson);
895+
896+
return { diagrams, jsonPaths, parsedJson };
897+
}
898+
829899
/**
830900
* Extract Mermaid diagrams from markdown code blocks with position tracking
831901
* @param {string} response - Response that may contain markdown with mermaid blocks
@@ -836,6 +906,16 @@ export function extractMermaidFromMarkdown(response) {
836906
return { diagrams: [], cleanedResponse: response };
837907
}
838908

909+
// First check if this looks like a JSON response - if so, use JSON-aware extraction
910+
const trimmed = response.trim();
911+
if ((trimmed.startsWith('{') || trimmed.startsWith('[')) ||
912+
trimmed.includes('```json')) {
913+
const jsonResult = extractMermaidFromJson(response);
914+
if (jsonResult.diagrams.length > 0) {
915+
return { diagrams: jsonResult.diagrams, cleanedResponse: response };
916+
}
917+
}
918+
839919
// Find all mermaid code blocks with enhanced regex to capture more variations
840920
// This regex captures optional attributes on same line as ```mermaid, and all diagram content
841921
const mermaidBlockRegex = /```mermaid([^\n]*)\n([\s\S]*?)```/gi;
@@ -854,17 +934,93 @@ export function extractMermaidFromMarkdown(response) {
854934
fullMatch: match[0],
855935
startIndex: match.index,
856936
endIndex: match.index + match[0].length,
857-
attributes: attributes
937+
attributes: attributes,
938+
isInJson: false
858939
});
859940
}
860941

861942
// Return cleaned response (original for now, could be modified if needed)
862943
return { diagrams, cleanedResponse: response };
863944
}
864945

946+
/**
947+
* Replace mermaid diagrams in JSON string values with corrected versions
948+
* @param {string} originalResponse - Original response with JSON
949+
* @param {Array} correctedDiagrams - Array of corrected diagram objects with jsonPath
950+
* @returns {string} - Response with corrected diagrams properly escaped in JSON
951+
*/
952+
export function replaceMermaidDiagramsInJson(originalResponse, correctedDiagrams) {
953+
if (!originalResponse || typeof originalResponse !== 'string') {
954+
return originalResponse;
955+
}
956+
957+
if (!correctedDiagrams || correctedDiagrams.length === 0) {
958+
return originalResponse;
959+
}
960+
961+
// Extract and parse JSON
962+
const jsonResult = extractMermaidFromJson(originalResponse);
963+
if (!jsonResult.parsedJson) {
964+
return originalResponse;
965+
}
966+
967+
let modifiedJson = jsonResult.parsedJson;
968+
969+
// Replace diagrams in the JSON object
970+
for (const diagram of correctedDiagrams) {
971+
if (!diagram.jsonPath || !diagram.isInJson) {
972+
continue;
973+
}
974+
975+
// Navigate to the path and replace the content
976+
const pathParts = diagram.jsonPath.split('.').filter(p => p);
977+
let current = modifiedJson;
978+
979+
for (let i = 0; i < pathParts.length - 1; i++) {
980+
const part = pathParts[i];
981+
if (part.startsWith('[') && part.endsWith(']')) {
982+
const index = parseInt(part.slice(1, -1), 10);
983+
current = current[index];
984+
} else {
985+
current = current[part];
986+
}
987+
}
988+
989+
// Get the last key/index
990+
const lastPart = pathParts[pathParts.length - 1];
991+
const attributesStr = diagram.attributes ? ` ${diagram.attributes}` : '';
992+
const newCodeBlock = `\`\`\`mermaid${attributesStr}\n${diagram.content}\n\`\`\``;
993+
994+
if (lastPart.startsWith('[') && lastPart.endsWith(']')) {
995+
const index = parseInt(lastPart.slice(1, -1), 10);
996+
const originalString = current[index];
997+
// The fullMatch from extraction has unescaped newlines, so we need to match that
998+
current[index] = originalString.replace(diagram.fullMatch, newCodeBlock);
999+
} else {
1000+
const originalString = current[lastPart];
1001+
// The fullMatch from extraction has unescaped newlines, so we need to match that
1002+
current[lastPart] = originalString.replace(diagram.fullMatch, newCodeBlock);
1003+
}
1004+
}
1005+
1006+
// Reconstruct the response with modified JSON
1007+
const modifiedJsonString = JSON.stringify(modifiedJson, null, 2);
1008+
1009+
// Check if original was in a code block
1010+
const trimmed = originalResponse.trim();
1011+
if (trimmed.match(/```json\s*\n([\s\S]*?)\n```/)) {
1012+
return originalResponse.replace(/```json\s*\n([\s\S]*?)\n```/, `\`\`\`json\n${modifiedJsonString}\n\`\`\``);
1013+
} else if (trimmed.match(/```\s*\n([{\[][\s\S]*?[}\]])\s*```/)) {
1014+
return originalResponse.replace(/```\s*\n([{\[][\s\S]*?[}\]])\s*```/, `\`\`\`\n${modifiedJsonString}\n\`\`\``);
1015+
}
1016+
1017+
return modifiedJsonString;
1018+
}
1019+
8651020
/**
8661021
* Replace mermaid diagrams in original markdown with corrected versions
867-
* @param {string} originalResponse - Original response with markdown
1022+
* Automatically detects JSON vs markdown format and uses appropriate replacement
1023+
* @param {string} originalResponse - Original response with markdown or JSON
8681024
* @param {Array} correctedDiagrams - Array of corrected diagram objects
8691025
* @returns {string} - Response with corrected diagrams in original format
8701026
*/
@@ -877,22 +1033,28 @@ export function replaceMermaidDiagramsInMarkdown(originalResponse, correctedDiag
8771033
return originalResponse;
8781034
}
8791035

1036+
// Check if any diagrams are in JSON format
1037+
const hasJsonDiagrams = correctedDiagrams.some(d => d.isInJson);
1038+
if (hasJsonDiagrams) {
1039+
return replaceMermaidDiagramsInJson(originalResponse, correctedDiagrams);
1040+
}
1041+
8801042
let modifiedResponse = originalResponse;
881-
1043+
8821044
// Sort diagrams by start index in reverse order to preserve indices during replacement
8831045
const sortedDiagrams = [...correctedDiagrams].sort((a, b) => b.startIndex - a.startIndex);
884-
1046+
8851047
for (const diagram of sortedDiagrams) {
8861048
// Reconstruct the code block with original attributes if they existed
8871049
const attributesStr = diagram.attributes ? ` ${diagram.attributes}` : '';
8881050
const newCodeBlock = `\`\`\`mermaid${attributesStr}\n${diagram.content}\n\`\`\``;
889-
1051+
8901052
// Replace the original code block
891-
modifiedResponse = modifiedResponse.slice(0, diagram.startIndex) +
892-
newCodeBlock +
1053+
modifiedResponse = modifiedResponse.slice(0, diagram.startIndex) +
1054+
newCodeBlock +
8931055
modifiedResponse.slice(diagram.endIndex);
8941056
}
895-
1057+
8961058
return modifiedResponse;
8971059
}
8981060

npm/tests/integration/validationFlow.test.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,12 @@ describe('Validation Flow Integration', () => {
4646
expect(isMermaidSchema(schema)).toBe(true);
4747
expect(isJsonSchema(schema)).toBe(true);
4848

49-
// Test Mermaid validation first - response contains diagram in JSON field, not in markdown blocks
49+
// Test Mermaid validation first - with JSON-aware extraction, diagrams in JSON strings are now found
5050
const mermaidValidation = await validateMermaidResponse(response);
51-
expect(mermaidValidation.isValid).toBe(false); // No mermaid code blocks found in this JSON structure
51+
expect(mermaidValidation.isValid).toBe(true); // Mermaid diagrams in JSON strings are now detected and validated
52+
expect(mermaidValidation.diagrams).toHaveLength(1);
53+
expect(mermaidValidation.diagrams[0].isInJson).toBe(true);
54+
expect(mermaidValidation.diagrams[0].diagramType).toBe('flowchart');
5255

5356
// Test JSON validation second
5457
const cleanedResponse = cleanSchemaResponse(response);

0 commit comments

Comments
 (0)