Skip to content
Merged
3 changes: 0 additions & 3 deletions src/generators/metadata/constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,5 @@ export const DOC_API_HEADING_TYPES = [
},
];

// This regex is used to match basic TypeScript generic types (e.g., Promise<string>)
export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/;

// This is the base URL of the Man7 documentation
export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man';
23 changes: 23 additions & 0 deletions src/generators/metadata/utils/__tests__/transformers.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,27 @@ describe('transformTypeToReferenceLink', () => {
'[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt; & [`<Array>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;'
);
});

it('should transform a function returning a Generic type', () => {
strictEqual(
transformTypeToReferenceLink('(err: Error) => Promise<boolean>', {}),
'(err: Error) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)&gt;'
);
});

it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => {
strictEqual(
transformTypeToReferenceLink('string | number & boolean', {}),
'[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)'
);
});

it('should handle extreme nested combinations of functions, generics, unions, and intersections', () => {
const input =
'(str: MyType) => Promise<Map<string, number & string>, Map<string | number>>';
const expected =
'(str: MyType) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;, [`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt;&gt;';
Comment thread
avivkeller marked this conversation as resolved.
Outdated

strictEqual(transformTypeToReferenceLink(input, {}), expected);
});
});
111 changes: 3 additions & 108 deletions src/generators/metadata/utils/transformers.mjs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import {
DOC_MAN_BASE_URL,
DOC_API_HEADING_TYPES,
TYPE_GENERIC_REGEX,
} from '../constants.mjs';
import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs';
import { slug } from './slugger.mjs';
import { parseType } from './typeParser.mjs';
import { transformNodesToString } from '../../../utils/unist.mjs';
import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' };
import MDN_TYPE_MAP from '../maps/mdn.json' with { type: 'json' };
Expand All @@ -22,84 +19,7 @@ export const transformUnixManualToLink = (
) => {
return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`;
};
/**
* Safely splits the string by `|` or `&` at the top level (ignoring those
* inside `< >`), and returns both the pieces and the separator used.
*
* @param {string} str The type string to split
* @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `)
*/
const splitByOuterSeparator = str => {
Comment thread
cursor[bot] marked this conversation as resolved.
const pieces = [];
let current = '';
let depth = 0;
let separator;

for (const char of str) {
if (char === '<') {
depth++;
} else if (char === '>') {
depth--;
} else if ((char === '|' || char === '&') && depth === 0) {
pieces.push(current);
current = '';
separator ??= ` ${char} `;
continue;
}
current += char;
}

pieces.push(current);
return { pieces, separator };
};

/**
* Attempts to parse and format a basic Generic type (e.g., Promise<string>).
* It also supports union and multi-parameter types within the generic brackets.
*
* @param {string} typePiece The plain type piece to be evaluated
* @param {Function} transformType The function used to resolve individual types into links
* @returns {string|null} The formatted Markdown link, or null if no match is found
*/
const formatBasicGeneric = (typePiece, transformType) => {
const genericMatch = typePiece.match(TYPE_GENERIC_REGEX);

if (genericMatch) {
const baseType = genericMatch[1].trim();
const innerType = genericMatch[2].trim();

const baseResult = transformType(baseType.replace(/\[\]$/, ''));
const baseFormatted = baseResult
? `[\`<${baseType}>\`](${baseResult})`
: `\`<${baseType}>\``;

// Split while capturing delimiters (| or ,) to preserve original syntax
const parts = innerType.split(/([|,])/);

const innerFormatted = parts
.map(part => {
const trimmed = part.trim();
// If it is a delimiter, return it as is
if (trimmed === '|') {
return ' | ';
}

if (trimmed === ',') {
return ', ';
}

const innerRes = transformType(trimmed.replace(/\[\]$/, ''));
return innerRes
? `[\`<${trimmed}>\`](${innerRes})`
: `\`<${trimmed}>\``;
})
.join('');

return `${baseFormatted}&lt;${innerFormatted}&gt;`;
}

return null;
};
/**
* This method replaces plain text Types within the Markdown content into Markdown links
* that link to the actual relevant reference for such type (either internal or external link)
Expand Down Expand Up @@ -150,32 +70,7 @@ export const transformTypeToReferenceLink = (type, record) => {
return '';
};

const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput);

const typePieces = outerPieces.map(piece => {
// This is the content to render as the text of the Markdown link
const trimmedPiece = piece.trim();

// 1. Attempt to format as a basic Generic type first
const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType);
if (genericMarkdown) {
return genericMarkdown;
}

// 2. Fallback to the logic for plain types
// This is what we will compare against the API types mappings
// The ReGeX below is used to remove `[]` from the end of the type
const result = transformType(trimmedPiece.replace(/\[\]$/, ''));

// If we have a valid result and the piece is not empty, we return the Markdown link
if (trimmedPiece.length && result.length) {
return `[\`<${trimmedPiece}>\`](${result})`;
}
});

// Filter out pieces that we failed to map and then join the valid ones
// using the same separator that appeared in the original type string
const markdownLinks = typePieces.filter(Boolean).join(separator);
const markdownLinks = parseType(typeInput, transformType);

// Return the replaced links or the original content if they all failed to be replaced
// Note that if some failed to get replaced, only the valid ones will be returned
Expand Down
121 changes: 121 additions & 0 deletions src/generators/metadata/utils/typeParser.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/**
* Safely splits a string by a given set of separators at depth 0 (ignoring those inside < > or ( )).
*
* @param {string} str The string to split
* @param {string} separator The separator to split by (e.g., '|', '&', ',', '=>')
* @returns {string[]} The split pieces
*/
const splitByOuterSeparator = (str, separator) => {
const pieces = [];
let current = '';
let depth = 0;

for (let i = 0; i < str.length; i++) {
const char = str[i];

// Track depth using brackets and parentheses
if (char === '<' || char === '(') {
depth++;
} else if ((char === '>' && str[i - 1] !== '=') || char === ')') {
depth--;
}

// Check for multi-character separators like '=>'
const isArrow = separator === '=>' && char === '=' && str[i + 1] === '>';
// Check for single-character separators
const isCharSeparator = separator === char;

if (depth === 0 && (isCharSeparator || isArrow)) {
pieces.push(current.trim());
current = '';
if (isArrow) {
i++;
} // skip the '>' part of '=>'
continue;
}

current += char;
}

pieces.push(current.trim());
return pieces;
};
/**
* Recursively parses advanced TypeScript types, including Unions, Intersections, Functions, and Nested Generics.
* * @param {string} typeString The plain type string to evaluate
* @param {Function} transformType The function used to resolve individual types into links
* @returns {string|null} The formatted Markdown link(s), or null if the base type doesn't map
*/
export const parseType = (typeString, transformType) => {
const trimmed = typeString.trim();
if (!trimmed) {
return null;
}

// Handle Unions (|)
if (trimmed.includes('|')) {
const parts = splitByOuterSeparator(trimmed, '|');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' | '
const resolvedParts = parts.map(
p => parseType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' | ');
}
}

// Handle Intersections (&)
if (trimmed.includes('&')) {
const parts = splitByOuterSeparator(trimmed, '&');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' & '
const resolvedParts = parts.map(
p => parseType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' & ');
}
}

// Handle Functions (=>)
if (trimmed.includes('=>')) {
const parts = splitByOuterSeparator(trimmed, '=>');
if (parts.length === 2) {
const params = parts[0];
const returnType = parts[1];

// Preserve the function signature, just link the return type for now
// (Mapping param types inside the signature string is complex and often unnecessary for simple docs)
const parsedReturn =
parseType(returnType, transformType) || `\`<${returnType}>\``;
return `${params} =&gt; ${parsedReturn}`;
}
}
Comment thread
moshams272 marked this conversation as resolved.

// 3. Handle Generics (Base<Inner, Inner>)
if (trimmed.includes('<') && trimmed.endsWith('>')) {
const firstBracketIndex = trimmed.indexOf('<');
const baseType = trimmed.slice(0, firstBracketIndex).trim();
const innerType = trimmed.slice(firstBracketIndex + 1, -1).trim();

const baseResult = transformType(baseType.replace(/\[\]$/, ''));
const baseFormatted = baseResult
? `[\`<${baseType}>\`](${baseResult})`
: `\`<${baseType}>\``;

// Split arguments safely by comma
const innerArgs = splitByOuterSeparator(innerType, ',');
const innerFormatted = innerArgs
.map(arg => parseType(arg, transformType) || `\`<${arg}>\``)
.join(', ');

return `${baseFormatted}&lt;${innerFormatted}&gt;`;
}

// Base Case: Plain Type (e.g., string, Buffer, Function)
const result = transformType(trimmed.replace(/\[\]$/, ''));
if (trimmed.length && result) {
return `[\`<${trimmed}>\`](${result})`;
}
Comment thread
avivkeller marked this conversation as resolved.

return null;
};
Loading