Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Available beta endpoints:
- `PATCH /v1/memories/:id`
- `POST /v1/search`
- `POST /v1/context`
- `ALL /v1/mcp` (remote MCP recall — see below)
- `GET /v1/audit?projectId=<id>`

When `CLAUDE_MEM_AUTH_MODE=api-key`, send `Authorization: Bearer <key>`. Read endpoints require `memories:read`; write endpoints require `memories:write`.
Expand All @@ -38,3 +39,30 @@ always populated (or `null` only when generation was explicitly disabled).
The actual provider call happens in a separate BullMQ worker process
(`claude-mem server worker start`); the HTTP path never blocks on a
provider response.

## Remote MCP endpoint

`/v1/mcp` is a streamable-HTTP [MCP](https://modelcontextprotocol.io) server —
the secure, authenticated link a user pastes into Claude Code (or any MCP
client) to recall their cloud memory. It is read-only and authenticated by the
same API key as the REST routes (`memories:read`); the key's team (and project,
if the key is project-scoped) bound every read.

Connect:

```bash
claude mcp add --transport http claude-mem <server-base>/v1/mcp \
--header "Authorization: Bearer cm_..."
```

Tools:

- `search` — `{ projectId, query, limit? }` → matching observations (FTS, same
path as `POST /v1/search`).
- `context` — `{ projectId, query, limit? }` → observations plus a concatenated
`context` string ready for prompt injection (same path as `POST /v1/context`).
- `recent` — `{ projectId, limit? }` → the newest observations for a project.

The transport is stateless: one MCP server + transport per request, so it needs
no session affinity behind a load balancer. Mutating tools are intentionally
absent — a pasted recall link cannot write.
148 changes: 148 additions & 0 deletions src/server/mcp/recall-mcp-server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// SPDX-License-Identifier: Apache-2.0
//
// Remote-recall MCP server factory.
//
// Builds a low-level MCP `Server` exposing the read tools (`search`, `context`,
// `recent`) over an injected `RecallBackend`. The backend is the only seam to
// storage, so this factory is pure and unit-testable without Postgres — the
// route layer (ServerV1PostgresRoutes) supplies a backend already scoped to the
// authenticated API key's team (and honoring any project scope).
//
// This is the same recall surface the stdio MCP server exposes via
// ServerBetaClient (`/v1/search`, `/v1/context`), so a hosted MCP link and the
// local CLI read identical data. The mutating tools are intentionally absent:
// a pasted recall link is read-only.

import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema,
type CallToolResult,
type Tool,
} from '@modelcontextprotocol/sdk/types.js';

export interface RecallBackend {
// Returns serialized observations (already shaped by serializeObservation),
// scoped to the caller's team. Throws if `projectId` is outside the key's scope.
search(args: { projectId: string; query: string; limit: number }): Promise<unknown[]>;
recent(args: { projectId: string; limit: number }): Promise<unknown[]>;
}

const SEARCH_LIMIT = { default: 20, max: 100 };
const CONTEXT_LIMIT = { default: 10, max: 50 };
const RECENT_LIMIT = { default: 20, max: 100 };

const TOOLS: Tool[] = [
{
name: 'search',
description:
'Full-text search your claude-mem memory for a project. Returns matching observations (most relevant first).',
inputSchema: {
type: 'object',
properties: {
projectId: { type: 'string', description: 'Project to search within.' },
query: { type: 'string', description: 'Search query.' },
limit: { type: 'integer', minimum: 1, maximum: SEARCH_LIMIT.max },
},
required: ['projectId', 'query'],
},
},
{
name: 'context',
description:
'Like search, but also returns a concatenated context string ready to inject into a prompt.',
inputSchema: {
type: 'object',
properties: {
projectId: { type: 'string', description: 'Project to search within.' },
query: { type: 'string', description: 'Search query.' },
limit: { type: 'integer', minimum: 1, maximum: CONTEXT_LIMIT.max },
},
required: ['projectId', 'query'],
},
},
{
name: 'recent',
description: 'List the most recent observations for a project (newest first).',
inputSchema: {
type: 'object',
properties: {
projectId: { type: 'string', description: 'Project to list.' },
limit: { type: 'integer', minimum: 1, maximum: RECENT_LIMIT.max },
},
required: ['projectId'],
},
},
];

function clampLimit(raw: unknown, spec: { default: number; max: number }): number {
if (typeof raw !== 'number' || !Number.isFinite(raw)) return spec.default;
return Math.min(Math.max(1, Math.trunc(raw)), spec.max);
}

function requireString(args: Record<string, unknown>, key: string): string {
const value = args[key];
if (typeof value !== 'string' || value.trim().length === 0) {
throw new Error(`"${key}" is required`);
}
return value;
}

function jsonResult(payload: unknown): CallToolResult {
return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }] };
}

/**
* Build a read-only recall MCP server bound to `backend`. The caller owns the
* transport (stdio in the CLI, streamable-HTTP in Server Beta).
*/
export function createRecallMcpServer(backend: RecallBackend, version: string): Server {
const server = new Server(
{ name: 'claude-mem', version },
{ capabilities: { tools: {} } },
);

server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));

server.setRequestHandler(CallToolRequestSchema, async (request): Promise<CallToolResult> => {
const name = request.params.name;
const args = (request.params.arguments ?? {}) as Record<string, unknown>;
try {
if (name === 'search') {
const observations = await backend.search({
projectId: requireString(args, 'projectId'),
query: requireString(args, 'query'),
limit: clampLimit(args.limit, SEARCH_LIMIT),
});
return jsonResult({ observations });
}
if (name === 'context') {
const observations = await backend.search({
projectId: requireString(args, 'projectId'),
query: requireString(args, 'query'),
limit: clampLimit(args.limit, CONTEXT_LIMIT),
});
const context = observations
.map((o) => (o as { content?: unknown }).content)
.filter((t): t is string => typeof t === 'string' && t.length > 0)
.join('\n\n');
return jsonResult({ observations, context });
}
if (name === 'recent') {
const observations = await backend.recent({
projectId: requireString(args, 'projectId'),
limit: clampLimit(args.limit, RECENT_LIMIT),
});
return jsonResult({ observations });
}
throw new Error(`Unknown tool: ${name}`);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { isError: true, content: [{ type: 'text', text: message }] };
}
});

return server;
}

export const RECALL_MCP_TOOLS = TOOLS;
45 changes: 45 additions & 0 deletions src/server/routes/v1/ServerV1PostgresRoutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import { PostgresObservationRepository } from '../../../storage/postgres/observa
import { logger } from '../../../utils/logger.js';
import { requirePostgresServerAuth } from '../../middleware/postgres-auth.js';
import { requestIdMiddleware } from '../../middleware/request-id.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { createRecallMcpServer, type RecallBackend } from '../../mcp/recall-mcp-server.js';
import type { ActiveServerBetaQueueManager } from '../../runtime/ActiveServerBetaQueueManager.js';
import type { ServerBetaQueueManager } from '../../runtime/types.js';
import { PostgresServerSessionsRepository } from '../../../storage/postgres/server-sessions.js';
Expand All @@ -29,6 +31,10 @@ import { EndSessionService } from '../../services/EndSessionService.js';

const SOURCE_ADAPTER_DEFAULT = 'api';

declare const __DEFAULT_PACKAGE_VERSION__: string;
const MCP_SERVER_VERSION =
typeof __DEFAULT_PACKAGE_VERSION__ !== 'undefined' ? __DEFAULT_PACKAGE_VERSION__ : '0.0.0-dev';

export interface ServerV1PostgresRoutesOptions {
pool: PostgresPool;
queueManager: ServerBetaQueueManager;
Expand Down Expand Up @@ -926,6 +932,45 @@ export class ServerV1PostgresRoutes implements RouteHandler {
}
},
));

// Remote authenticated MCP endpoint. The "secure MCP link" a user pastes
// into Claude Code (or any MCP client) to recall their cloud memory:
// claude mcp add --transport http claude-mem <base>/v1/mcp \
// --header "Authorization: Bearer cm_..."
// Same readAuth (memories:read) + team/project scoping as /v1/search, so it
// reads identical data through identical guards. Stateless streamable-HTTP:
// one transport + server per request, bound to this key's team.
app.all('/v1/mcp', readAuth, this.asyncHandler(async (req, res) => {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 app.all exposes the MCP endpoint to all HTTP methods

The MCP streamable-HTTP protocol only uses POST (JSON-RPC) and GET (SSE). Using app.all additionally routes DELETE, PUT, PATCH, HEAD, and OPTIONS through the full auth middleware before the transport can reject them. CORS preflight OPTIONS requests will fail with 401 because browsers omit the Authorization header on preflights — silently breaking any browser-based MCP client. Restricting to the two methods the protocol actually uses is safer.

Suggested change
app.all('/v1/mcp', readAuth, this.asyncHandler(async (req, res) => {
app.route('/v1/mcp')
.get(readAuth, this.asyncHandler(async (req, res) => { await handleMcp(req, res); }))
.post(readAuth, this.asyncHandler(async (req, res) => { await handleMcp(req, res); }));

const teamId = this.requireTeamId(req, res);
if (!teamId) return;
const projectScope = req.authContext?.projectId ?? null;
const repo = new PostgresObservationRepository(this.options.pool);
const assertProjectAllowed = (projectId: string): void => {
if (projectScope && projectScope !== projectId) {
throw new Error('API key is scoped to a different project');
}
};
const backend: RecallBackend = {
search: async ({ projectId, query, limit }) => {
assertProjectAllowed(projectId);
const rows = await repo.search({ projectId, teamId, query, limit });
return rows.map(serializeObservation);
},
recent: async ({ projectId, limit }) => {
assertProjectAllowed(projectId);
const rows = await repo.listByProject({ projectId, teamId, limit });
return rows.map(serializeObservation);
},
};
const server = createRecallMcpServer(backend, MCP_SERVER_VERSION);
const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined });
res.on('close', () => {
void transport.close();
void server.close();
});
await server.connect(transport);
await transport.handleRequest(req, res, req.body);
}));
Comment on lines +943 to +973

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Audit logging is absent from the MCP read path

/v1/search and /v1/context both call this.auditRead(...) immediately after a successful Postgres query, recording the actor, project, query, and returned observation IDs. The /v1/mcp handler skips this entirely. Because the backend closures (search / recent) are called deep inside the MCP SDK dispatch loop — not from this handler — there is no natural place to attach a post-query hook without threading audit context down into RecallBackend. As a result every observation read through the MCP endpoint leaves no audit trail, even though the PR description explicitly states it "reads identical data through identical guards."

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

}

private async auditRead(
Expand Down
113 changes: 113 additions & 0 deletions tests/server/mcp/recall-mcp-server.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// SPDX-License-Identifier: Apache-2.0
//
// Unit tests for the remote-recall MCP server factory. The factory is pure
// (storage is injected as a RecallBackend), so these run with no Postgres —
// they drive a real MCP Client over an in-memory transport, exactly how a
// hosted client would, and assert tool listing, arg forwarding/clamping,
// context packing, and that backend failures surface as tool errors (not
// transport throws).

import { describe, it, expect } from 'bun:test';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js';
import { createRecallMcpServer, type RecallBackend } from '../../../src/server/mcp/recall-mcp-server.js';

interface Recorded {
search: Array<{ projectId: string; query: string; limit: number }>;
recent: Array<{ projectId: string; limit: number }>;
}

function makeBackend(overrides: Partial<RecallBackend> = {}): { backend: RecallBackend; calls: Recorded } {
const calls: Recorded = { search: [], recent: [] };
const backend: RecallBackend = {
search: async (args) => {
calls.search.push(args);
return [
{ id: 'o1', content: 'alpha' },
{ id: 'o2', content: 'beta' },
];
},
recent: async (args) => {
calls.recent.push(args);
return [{ id: 'r1', content: 'recent-one' }];
},
...overrides,
};
return { backend, calls };
}

async function connectClient(backend: RecallBackend): Promise<Client> {
const server = createRecallMcpServer(backend, '9.9.9');
const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
const client = new Client({ name: 'test-client', version: '0' }, { capabilities: {} });
await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
return client;
}

function textOf(result: { content: unknown }): string {
const first = (result.content as Array<{ type: string; text?: string }>)[0];
return first?.text ?? '';
}

describe('createRecallMcpServer', () => {
it('lists exactly the read-only recall tools', async () => {
const client = await connectClient(makeBackend().backend);
const { tools } = await client.listTools();
expect(tools.map((t) => t.name).sort()).toEqual(['context', 'recent', 'search']);
await client.close();
});

it('search forwards args, clamps the limit, and returns observations', async () => {
const { backend, calls } = makeBackend();
const client = await connectClient(backend);
const res = await client.callTool({
name: 'search',
arguments: { projectId: 'p1', query: 'hello', limit: 9999 },
});
expect(calls.search[0]).toEqual({ projectId: 'p1', query: 'hello', limit: 100 });
expect(JSON.parse(textOf(res)).observations).toHaveLength(2);
await client.close();
});

it('context packs the observation contents into a joined string', async () => {
const client = await connectClient(makeBackend().backend);
const res = await client.callTool({ name: 'context', arguments: { projectId: 'p1', query: 'hi' } });
expect(JSON.parse(textOf(res)).context).toBe('alpha\n\nbeta');
await client.close();
});

it('recent calls the recent backend with the default limit', async () => {
const { backend, calls } = makeBackend();
const client = await connectClient(backend);
await client.callTool({ name: 'recent', arguments: { projectId: 'p2' } });
expect(calls.recent[0]).toEqual({ projectId: 'p2', limit: 20 });
await client.close();
});

it('a missing required arg is a tool error, not a transport throw', async () => {
const client = await connectClient(makeBackend().backend);
const res = await client.callTool({ name: 'search', arguments: { projectId: 'p1' } });
expect(res.isError).toBe(true);
await client.close();
});

it('a backend project-scope rejection surfaces as a tool error', async () => {
const { backend } = makeBackend({
search: async () => {
throw new Error('API key is scoped to a different project');
},
});
const client = await connectClient(backend);
const res = await client.callTool({ name: 'search', arguments: { projectId: 'other', query: 'x' } });
expect(res.isError).toBe(true);
expect(textOf(res)).toContain('different project');
await client.close();
});

it('an unknown tool is a tool error', async () => {
const client = await connectClient(makeBackend().backend);
const res = await client.callTool({ name: 'nope', arguments: {} });
expect(res.isError).toBe(true);
await client.close();
});
});
Loading