Skip to content

Commit 55f3142

Browse files
authored
fix(sync): avoid shell for scan subprocesses (#814)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 9862ef5 commit 55f3142

3 files changed

Lines changed: 213 additions & 8 deletions

File tree

SECURITY.md

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,71 @@
88

99
## Reporting a Vulnerability
1010

11-
Use this section to tell people how to report a vulnerability.
11+
If you find a vulnerability, please contact hello@basicmachines.co.
1212

13-
If you find a vulnerability, please contact hello@basicmachines.co
13+
Please do not open a public GitHub issue for security vulnerabilities. We aim
14+
to respond within 72 hours and will coordinate a fix and disclosure timeline
15+
with you.
16+
17+
## Threat Model
18+
19+
Basic Memory is a local-first MCP server that reads and writes markdown files
20+
inside configured project directories. It runs on your machine with your user
21+
permissions, so local configuration deserves the same care as any other
22+
developer tool that can access your files.
23+
24+
### What Basic Memory Controls
25+
26+
- Filesystem-touching tools validate paths against the configured project root
27+
with `validate_project_path()`, resolved paths, and `Path.is_relative_to()`.
28+
Path traversal attempts such as `../../etc/passwd` are blocked at this layer.
29+
- Scan optimizations in `sync_service.py` call `find` through
30+
`asyncio.create_subprocess_exec()` with explicit argument lists. Project paths
31+
are passed as data, not interpolated into shell strings.
32+
- Auto-update code uses hardcoded commands, list-form arguments, and
33+
`stdin=DEVNULL`. User-controlled strings do not reach a shell there.
34+
35+
### MCP Client-Side Risk
36+
37+
Recent MCP ecosystem research has highlighted a client-side pattern where an
38+
MCP host can be configured to run arbitrary commands as "servers." That risk is
39+
in the host configuration, not in notes or Basic Memory tool input.
40+
41+
The recommended Basic Memory MCP configuration uses a known command with
42+
explicit arguments:
43+
44+
```json
45+
{
46+
"mcpServers": {
47+
"basic-memory": {
48+
"command": "uvx",
49+
"args": ["basic-memory", "mcp"]
50+
}
51+
}
52+
}
53+
```
54+
55+
Only add MCP server entries from sources you trust. Avoid inline shell scripts
56+
or command strings copied from untrusted sources. Treat third-party MCP server
57+
configuration with the same scrutiny as any locally executed program.
58+
59+
Related ecosystem context:
60+
61+
- OX Security: The Mother of All AI Supply Chains
62+
- CSO Online: RCE by design: MCP architectural choice haunts AI agent ecosystem
63+
64+
### Out Of Scope
65+
66+
- Basic Memory does not execute note content as code. Notes are returned as
67+
data to the LLM.
68+
- Basic Memory does not open network ports by default. The MCP server uses
69+
stdio; the optional REST API is intended for localhost use.
70+
- Basic Memory is designed for single-user local knowledge bases and does not
71+
implement access controls between operating-system users.
72+
73+
## Secure Configuration Checklist
74+
75+
- MCP config `command` points to `uvx` or a trusted binary, not a shell string.
76+
- Project paths in Basic Memory config come from trusted local configuration.
77+
- If exposing the REST API, bind it only to localhost.
78+
- Review any third-party MCP servers before adding them to your host config.

src/basic_memory/sync/sync_service.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,12 +1530,36 @@ async def _quick_count_files(self, directory: Path) -> int:
15301530
count += 1
15311531
return count
15321532

1533-
process = await asyncio.create_subprocess_shell(
1534-
f'find "{directory}" -type f | wc -l',
1533+
# Trigger: large-project scan optimization needs the OS `find` command.
1534+
# Why: passing argv directly avoids shell interpretation of configured project paths.
1535+
# Outcome: quotes and shell metacharacters in paths are treated as data.
1536+
process = await asyncio.create_subprocess_exec(
1537+
"find",
1538+
str(directory),
1539+
"-type",
1540+
"f",
1541+
"-print0",
15351542
stdout=asyncio.subprocess.PIPE,
15361543
stderr=asyncio.subprocess.PIPE,
15371544
)
1538-
stdout, stderr = await process.communicate()
1545+
1546+
count = 0
1547+
stderr_task = None
1548+
if process.stderr is not None:
1549+
stderr_task = asyncio.create_task(process.stderr.read())
1550+
1551+
if process.stdout is None:
1552+
await process.wait()
1553+
else:
1554+
# Trigger: `find` can emit one path per file for very large projects.
1555+
# Why: collecting every path via communicate() scales memory with path bytes.
1556+
# Outcome: count null-delimited records in fixed-size chunks.
1557+
while chunk := await process.stdout.read(1024 * 1024):
1558+
count += chunk.count(b"\0")
1559+
1560+
await process.wait()
1561+
1562+
stderr = await stderr_task if stderr_task is not None else b""
15391563

15401564
if process.returncode != 0:
15411565
error_msg = stderr.decode().strip()
@@ -1550,7 +1574,7 @@ async def _quick_count_files(self, directory: Path) -> int:
15501574
count += 1
15511575
return count
15521576

1553-
return int(stdout.strip())
1577+
return count
15541578

15551579
async def _scan_directory_modified_since(
15561580
self, directory: Path, since_timestamp: float
@@ -1583,8 +1607,16 @@ async def _scan_directory_modified_since(
15831607
# Convert timestamp to find-compatible format
15841608
since_date = datetime.fromtimestamp(since_timestamp).strftime("%Y-%m-%d %H:%M:%S")
15851609

1586-
process = await asyncio.create_subprocess_shell(
1587-
f'find "{directory}" -type f -newermt "{since_date}"',
1610+
# Trigger: incremental scans ask `find` to filter by modification time.
1611+
# Why: passing argv directly avoids shell interpretation of paths and timestamps.
1612+
# Outcome: optimized scanning keeps its speed without a shell injection boundary.
1613+
process = await asyncio.create_subprocess_exec(
1614+
"find",
1615+
str(directory),
1616+
"-type",
1617+
"f",
1618+
"-newermt",
1619+
since_date,
15881620
stdout=asyncio.subprocess.PIPE,
15891621
stderr=asyncio.subprocess.PIPE,
15901622
)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""Tests for safe subprocess usage in sync scan optimizations."""
2+
3+
from datetime import datetime
4+
import sys
5+
6+
import pytest
7+
8+
import basic_memory.sync.sync_service as sync_service_module
9+
10+
11+
class _FakeProcess:
12+
def __init__(self, stdout: bytes, returncode: int = 0):
13+
self._stdout = stdout
14+
self.returncode = returncode
15+
16+
async def communicate(self):
17+
return self._stdout, b""
18+
19+
20+
class _FakeStream:
21+
def __init__(self, chunks: list[bytes]):
22+
self._chunks = chunks
23+
24+
async def read(self, _limit: int = -1):
25+
if not self._chunks:
26+
return b""
27+
return self._chunks.pop(0)
28+
29+
30+
class _FakeStreamingProcess:
31+
def __init__(self, stdout_chunks: list[bytes], returncode: int = 0):
32+
self.stdout = _FakeStream(stdout_chunks)
33+
self.stderr = _FakeStream([b""])
34+
self.returncode = returncode
35+
36+
async def wait(self):
37+
return self.returncode
38+
39+
async def communicate(self): # pragma: no cover - failure path for the assertion below
40+
raise AssertionError("_quick_count_files must stream stdout instead of buffering it")
41+
42+
43+
@pytest.mark.asyncio
44+
@pytest.mark.skipif(sys.platform == "win32", reason="Windows path uses Python scan fallback")
45+
async def test_quick_count_files_uses_exec_without_shell(monkeypatch, sync_service, tmp_path):
46+
"""Directory names with shell metacharacters must be passed as exec args."""
47+
directory = tmp_path / 'project "quoted"; echo unsafe'
48+
captured_args: list[tuple[str, ...]] = []
49+
50+
async def fail_if_shell_called(*args, **kwargs):
51+
raise AssertionError("create_subprocess_shell must not be used")
52+
53+
async def fake_create_subprocess_exec(*args, **kwargs):
54+
captured_args.append(args)
55+
return _FakeStreamingProcess(stdout_chunks=[b"/project/a.md\0", b"/project/b.md\0"])
56+
57+
monkeypatch.setattr(
58+
sync_service_module.asyncio, "create_subprocess_shell", fail_if_shell_called
59+
)
60+
monkeypatch.setattr(
61+
sync_service_module.asyncio,
62+
"create_subprocess_exec",
63+
fake_create_subprocess_exec,
64+
)
65+
66+
count = await sync_service._quick_count_files(directory)
67+
68+
assert count == 2
69+
assert captured_args == [("find", str(directory), "-type", "f", "-print0")]
70+
71+
72+
@pytest.mark.asyncio
73+
@pytest.mark.skipif(sys.platform == "win32", reason="Windows path uses Python scan fallback")
74+
async def test_scan_directory_modified_since_uses_exec_without_shell(
75+
monkeypatch,
76+
sync_service,
77+
tmp_path,
78+
):
79+
"""Incremental scan should pass the timestamp and directory as exec args."""
80+
directory = tmp_path / "project $(echo unsafe)"
81+
since_timestamp = 1_700_000_000.0
82+
since_date = datetime.fromtimestamp(since_timestamp).strftime("%Y-%m-%d %H:%M:%S")
83+
captured_args: list[tuple[str, ...]] = []
84+
85+
async def fail_if_shell_called(*args, **kwargs):
86+
raise AssertionError("create_subprocess_shell must not be used")
87+
88+
async def fake_create_subprocess_exec(*args, **kwargs):
89+
captured_args.append(args)
90+
return _FakeProcess(
91+
stdout=(f"{directory / 'notes' / 'a.md'}\n{directory / 'notes' / 'b.md'}\n").encode()
92+
)
93+
94+
monkeypatch.setattr(
95+
sync_service_module.asyncio, "create_subprocess_shell", fail_if_shell_called
96+
)
97+
monkeypatch.setattr(
98+
sync_service_module.asyncio,
99+
"create_subprocess_exec",
100+
fake_create_subprocess_exec,
101+
)
102+
103+
paths = await sync_service._scan_directory_modified_since(directory, since_timestamp)
104+
105+
assert paths == ["notes/a.md", "notes/b.md"]
106+
assert captured_args == [
107+
("find", str(directory), "-type", "f", "-newermt", since_date),
108+
]

0 commit comments

Comments
 (0)