Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/mgc/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ node_modules/
dist/
*.js.map
.env
docs-cache/
15 changes: 15 additions & 0 deletions packages/mgc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Add to your MCP client config:
| Variable | Description | Default |
|----------|-------------|---------|
| `MGC_CLI_PATH` | Custom path to the mgc binary | `mgc` |
| `MAGALU_DOCS_DIR` | Path to scraped Magalu docs (enables doc search tools) | — |

## Available Tools

Expand Down Expand Up @@ -68,6 +69,20 @@ Add to your MCP client config:
- **mgc_block_storage_volume_list** — List volumes
- **mgc_block_storage_volume_create** — Create a volume

### Documentation Search
- **search_magalu_docs** — Semantic search across Magalu developer docs (requires `MAGALU_DOCS_DIR`)
- **get_magalu_doc** — Get full markdown content of a doc page (use search first to find filepath)

#### Scraping docs

Use `docusaurus-to-md` to scrape the Magalu docs into a local directory:

```bash
npx @arvoretech/docusaurus-to-md https://dev.magalu.com -o ./docs-cache
```

Then set `MAGALU_DOCS_DIR` to the output path when running the MCP server.

## Development

```bash
Expand Down
175 changes: 175 additions & 0 deletions packages/mgc/src/docs-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import { readdir, readFile } from "node:fs/promises";
import { join, relative } from "node:path";

interface IndexedDoc {
filepath: string;
url: string;
title: string;
content: string;
terms: Map<string, number>;
termCount: number;
}

export class DocsIndex {
private docs: IndexedDoc[] = [];
private idf: Map<string, number> = new Map();
private loaded = false;
private docsDir: string;

constructor(docsDir: string) {
this.docsDir = docsDir;
}

async load(): Promise<void> {
if (this.loaded) return;

const files = await this.findMarkdownFiles(this.docsDir);

const manifestMap = new Map<string, string>();
try {
const manifestRaw = await readFile(join(this.docsDir, "_manifest.json"), "utf-8");
const manifest: Array<{ url: string; filepath: string }> = JSON.parse(manifestRaw);
for (const entry of manifest) {
manifestMap.set(entry.filepath, entry.url);
}
} catch (_e) { /* manifest may not exist */ }

for (const file of files) {
if (file.endsWith("_all.md") || file.endsWith("_manifest.json")) continue;

const content = await readFile(file, "utf-8");
const title = this.extractTitle(content);
const terms = this.tokenize(content);
const termFreq = this.computeTermFrequency(terms);
const url = manifestMap.get(file) || this.filepathToUrl(file);

this.docs.push({
filepath: relative(this.docsDir, file),
url,
title,
content,
terms: termFreq,
termCount: terms.length,
});
}

this.computeIDF();
this.loaded = true;
console.error(`Docs index loaded: ${this.docs.length} documents from ${this.docsDir}`);
}

search(query: string, maxResults = 5): Array<{ url: string; title: string; snippet: string; score: number; filepath: string }> {
const queryTerms = this.tokenize(query);
if (!queryTerms.length) return [];

const scores: Array<{ doc: IndexedDoc; score: number }> = [];

for (const doc of this.docs) {
let score = 0;
for (const term of queryTerms) {
const tf = (doc.terms.get(term) || 0) / Math.max(doc.termCount, 1);
const idf = this.idf.get(term) || 0;
score += tf * idf;
}

const titleBonus = queryTerms.some((t) => doc.title.toLowerCase().includes(t)) ? 2 : 1;
score *= titleBonus;

if (score > 0) {
scores.push({ doc, score });
}
}

scores.sort((a, b) => b.score - a.score);

return scores.slice(0, maxResults).map(({ doc, score }) => ({
url: doc.url,
title: doc.title,
snippet: this.extractSnippet(doc.content, queryTerms),
score: Math.round(score * 10000) / 10000,
filepath: doc.filepath,
}));
}

getDocContent(filepath: string): string | null {
const doc = this.docs.find((d) => d.filepath === filepath);
return doc?.content ?? null;
}

get documentCount(): number {
return this.docs.length;
}

get isLoaded(): boolean {
return this.loaded;
}

private tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^\p{L}\p{N}\s]/gu, " ")
.split(/\s+/)
.filter((t) => t.length > 2);
}

private computeTermFrequency(terms: string[]): Map<string, number> {
const freq = new Map<string, number>();
for (const term of terms) {
freq.set(term, (freq.get(term) || 0) + 1);
}
return freq;
}

private computeIDF(): void {
const docFreq = new Map<string, number>();
for (const doc of this.docs) {
for (const term of doc.terms.keys()) {
docFreq.set(term, (docFreq.get(term) || 0) + 1);
}
}

const n = this.docs.length;
for (const [term, df] of docFreq) {
this.idf.set(term, Math.log(1 + n / df));
}
}

private extractTitle(content: string): string {
const match = content.match(/^#\s+(.+)$/m);
return match?.[1]?.trim() || "Untitled";
}

private extractSnippet(content: string, queryTerms: string[]): string {
const lines = content.split("\n").filter((l) => l.trim());
const lower = queryTerms.map((t) => t.toLowerCase());

for (const line of lines) {
if (lower.some((t) => line.toLowerCase().includes(t))) {
return line.slice(0, 300);
}
}

return lines.slice(0, 3).join(" ").slice(0, 300);
}

private async findMarkdownFiles(dir: string): Promise<string[]> {
const results: string[] = [];
try {
const entries = await readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
results.push(...(await this.findMarkdownFiles(fullPath)));
} else if (entry.name.endsWith(".md")) {
results.push(fullPath);
}
}
} catch (_e) { /* directory may not exist */ }
return results;
}

private filepathToUrl(filepath: string): string {
const rel = relative(this.docsDir, filepath).replace(/\.md$/, "");
return `https://dev.magalu.com/docs/${rel}`;
}
}
33 changes: 32 additions & 1 deletion packages/mgc/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
DbListParamsSchema,
BlockStorageListParamsSchema,
BlockStorageCreateParamsSchema,
SearchDocsParamsSchema,
GetDocParamsSchema,
} from "./types.js";

export class MgcMCPServer {
Expand All @@ -30,7 +32,8 @@ export class MgcMCPServer {
});

const client = new MgcClient(mgcPath);
this.tools = new MgcTools(client);
const docsDir = process.env.MAGALU_DOCS_DIR;
this.tools = new MgcTools(client, docsDir);

this.setupTools();
}
Expand Down Expand Up @@ -384,6 +387,34 @@ export class MgcMCPServer {
return this.tools.blockStorageVolumeCreate(validated);
}
);

this.server.registerTool(
"search_magalu_docs",
{
title: "Search Magalu Documentation",
description:
"Semantic search across Magalu Cloud developer documentation. Returns relevant doc pages with snippets and links. Requires MAGALU_DOCS_DIR env var pointing to scraped docs.",
inputSchema: SearchDocsParamsSchema.shape,
},
async (params) => {
const validated = SearchDocsParamsSchema.parse(params);
return this.tools.searchDocs(validated);
}
);

this.server.registerTool(
"get_magalu_doc",
{
title: "Get Magalu Doc Content",
description:
"Get the full markdown content of a specific Magalu documentation page by filepath. Use search_magalu_docs first to find the filepath.",
inputSchema: GetDocParamsSchema.shape,
},
async (params) => {
const validated = GetDocParamsSchema.parse(params);
return this.tools.getDoc(validated);
}
);
}

async start(): Promise<void> {
Expand Down
69 changes: 68 additions & 1 deletion packages/mgc/src/tools.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { MgcClient } from "./mgc-client.js";
import { DocsIndex } from "./docs-index.js";
import {
MgcExecuteParams,
VmListParams,
Expand All @@ -14,11 +15,19 @@ import {
DbListParams,
BlockStorageListParams,
BlockStorageCreateParams,
SearchDocsParams,
GetDocParams,
McpToolResult,
} from "./types.js";

export class MgcTools {
constructor(private client: MgcClient) {}
private docsIndex: DocsIndex | null = null;

constructor(private client: MgcClient, docsDir?: string) {
if (docsDir) {
this.docsIndex = new DocsIndex(docsDir);
}
}

private formatResult(
stdout: string,
Expand Down Expand Up @@ -297,4 +306,62 @@ export class MgcTools {
);
return this.formatResult(result.stdout, result.stderr, result.exitCode);
}

async searchDocs(params: SearchDocsParams): Promise<McpToolResult> {
if (!this.docsIndex) {
return {
content: [{ type: "text", text: JSON.stringify({ error: "Docs index not configured. Set MAGALU_DOCS_DIR env var." }, null, 2) }],
isError: true,
};
}

try {
await this.docsIndex.load();
const results = this.docsIndex.search(params.query, params.max_results);

return {
content: [{
type: "text",
text: JSON.stringify({
query: params.query,
totalIndexed: this.docsIndex.documentCount,
results,
}, null, 2),
}],
};
} catch (error) {
return {
content: [{ type: "text", text: JSON.stringify({ error: error instanceof Error ? error.message : String(error) }, null, 2) }],
isError: true,
};
}
}

async getDoc(params: GetDocParams): Promise<McpToolResult> {
if (!this.docsIndex) {
return {
content: [{ type: "text", text: JSON.stringify({ error: "Docs index not configured. Set MAGALU_DOCS_DIR env var." }, null, 2) }],
isError: true,
};
}

try {
await this.docsIndex.load();
const content = this.docsIndex.getDocContent(params.filepath);

if (!content) {
return {
content: [{ type: "text", text: JSON.stringify({ error: "Document not found", filepath: params.filepath }, null, 2) }],
isError: true,
};
}

return { content: [{ type: "text", text: content }] };
} catch (error) {
return {
content: [{ type: "text", text: JSON.stringify({ error: error instanceof Error ? error.message : String(error) }, null, 2) }],
isError: true,
};
}
}
}
11 changes: 11 additions & 0 deletions packages/mgc/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ export const BlockStorageCreateParamsSchema = z.object({
type: z.string().optional().describe("Volume type"),
});

export const SearchDocsParamsSchema = z.object({
query: z.string().min(1, "Query is required").describe("Search query in natural language to find relevant Magalu documentation"),
max_results: z.number().int().positive().optional().default(5).describe("Maximum number of results to return (default: 5)"),
});

export const GetDocParamsSchema = z.object({
filepath: z.string().min(1, "Filepath is required").describe("Relative filepath of the document (from search_magalu_docs results)"),
});

export type MgcExecuteParams = z.infer<typeof MgcExecuteParamsSchema>;
export type VmListParams = z.infer<typeof VmListParamsSchema>;
export type VmCreateParams = z.infer<typeof VmCreateParamsSchema>;
Expand All @@ -135,6 +144,8 @@ export type KubernetesClusterCreateParams = z.infer<typeof KubernetesClusterCrea
export type DbListParams = z.infer<typeof DbListParamsSchema>;
export type BlockStorageListParams = z.infer<typeof BlockStorageListParamsSchema>;
export type BlockStorageCreateParams = z.infer<typeof BlockStorageCreateParamsSchema>;
export type SearchDocsParams = z.infer<typeof SearchDocsParamsSchema>;
export type GetDocParams = z.infer<typeof GetDocParamsSchema>;

export interface McpToolResult {
[key: string]: unknown;
Expand Down
Loading