Skip to content

Commit b7aac9e

Browse files
committed
Fix AI-05 false positives on Node.js bundles, add e2e test suite
AI-05 flagged all non-entry-point .js files in compiled output directories (build/, dist/) as unexpected executables. Added Node.js entry point directory resolution, mirroring the existing Python module resolution approach. - Add _add_node_entry_point_files() to resolve build directory tree - Guard against root-level entry points to prevent over-whitelisting - Add 3 unit tests for Node.js multi-file builds - Add e2e test suite (test_e2e_bundles.py) scanning real registry bundles: finnhub, folk, nationalparks - Bump version to 0.2.4
1 parent 43bf8a7 commit b7aac9e

9 files changed

Lines changed: 267 additions & 7 deletions

File tree

apps/scanner/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ dmypy.json
5151
*.mcpb
5252
/tmp/
5353

54+
# Downloaded test bundles (e2e tests)
55+
/tests/data/
56+
5457
# Environment
5558
.env
5659
.env.*

apps/scanner/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ RUN curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main
2020
RUN npm install -g eslint eslint-plugin-security --no-fund --no-audit
2121

2222
# mpak-scanner + Python security tools (bandit, guarddog)
23-
RUN pip install --no-cache-dir "mpak-scanner[job]==0.2.3" bandit guarddog
23+
RUN pip install --no-cache-dir "mpak-scanner[job]==0.2.4" bandit guarddog
2424

2525
ENTRYPOINT ["mpak-scanner"]
2626
CMD ["job"]

apps/scanner/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "mpak-scanner"
7-
version = "0.2.3"
7+
version = "0.2.4"
88
description = "Security scanner for MCP bundles. Powers mpak Certified verification."
99
readme = "README.md"
1010
license = "Apache-2.0"
@@ -92,6 +92,9 @@ python-version = "3.13"
9292

9393
[tool.pytest.ini_options]
9494
testpaths = ["tests"]
95+
markers = [
96+
"e2e: end-to-end tests against real bundles from the registry (requires tests/data/)",
97+
]
9598

9699
[dependency-groups]
97100
dev = [

apps/scanner/src/mpak_scanner/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
from mpak_scanner.models import ComplianceLevel, ControlResult, SecurityReport
44
from mpak_scanner.scanner import scan_bundle
55

6-
__version__ = "0.2.3"
6+
__version__ = "0.2.4"
77
__all__ = ["scan_bundle", "SecurityReport", "ControlResult", "ComplianceLevel"]

apps/scanner/src/mpak_scanner/controls/artifact_integrity/ai05_bundle_completeness.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,10 @@ def _build_referenced_files(self, manifest: dict[str, Any], bundle_dir: Path | N
227227
cleaned = arg.replace("${__dirname}/", "")
228228
referenced.add(cleaned)
229229

230-
# _meta.org.mpaktrust metadata (non-executable, always allowed)
231-
# These don't need to be in the referenced set since they're not executable
230+
# Node.js entry point directory resolution
231+
server_type = server.get("type", "") if isinstance(server, dict) else ""
232+
if server_type == "node" and entry_point and bundle_dir:
233+
self._add_node_entry_point_files(entry_point, bundle_dir, referenced)
232234

233235
return referenced
234236

@@ -252,6 +254,27 @@ def _add_python_module_files(self, module_name: str, bundle_dir: Path, reference
252254
if f.is_file():
253255
referenced.add(str(f.relative_to(bundle_dir)).replace("\\", "/"))
254256

257+
def _add_node_entry_point_files(self, entry_point: str, bundle_dir: Path, referenced: set[str]) -> None:
258+
"""Resolve a Node.js entry point to its sibling modules.
259+
260+
TypeScript compiles to a directory (build/, dist/) where the entry
261+
point imports other .js files. Add all files in the entry point's
262+
directory tree as referenced.
263+
"""
264+
entry_path = bundle_dir / entry_point
265+
if not entry_path.exists():
266+
return
267+
268+
# Add all files in the entry point's parent directory tree
269+
entry_dir = entry_path.parent
270+
if entry_dir == bundle_dir:
271+
# Entry point is at root; don't add everything
272+
return
273+
274+
for f in entry_dir.rglob("*"):
275+
if f.is_file():
276+
referenced.add(str(f.relative_to(bundle_dir)).replace("\\", "/"))
277+
255278
def _looks_like_file(self, arg: str) -> bool:
256279
"""Check if an argument looks like a file path."""
257280
# Skip flags

apps/scanner/src/mpak_scanner/scanner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
logger = logging.getLogger(__name__)
7171

7272
# Version of the scanner
73-
SCANNER_VERSION = "0.2.3"
73+
SCANNER_VERSION = "0.2.4"
7474

7575
# Domain groupings for controls (matches MTF v0.1 spec)
7676
DOMAINS = {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""End-to-end tests against real bundles from the mpak registry.
2+
3+
These tests scan real published bundles to catch false positives and
4+
regressions that synthetic fixtures miss.
5+
6+
Setup:
7+
mpak bundle pull @nimblebraininc/finnhub -o tests/data/finnhub.mcpb
8+
mpak bundle pull @nimblebraininc/folk -o tests/data/folk.mcpb
9+
mpak bundle pull @nimblebraininc/nationalparks -o tests/data/nationalparks.mcpb
10+
11+
Run:
12+
uv run pytest tests/test_e2e_bundles.py -v
13+
uv run pytest -m e2e -v
14+
"""
15+
16+
from pathlib import Path
17+
18+
import pytest
19+
20+
from mpak_scanner import scan_bundle
21+
from mpak_scanner.models import ControlStatus, Severity
22+
23+
DATA_DIR = Path(__file__).parent / "data"
24+
25+
# Bundle paths
26+
FINNHUB = DATA_DIR / "finnhub.mcpb"
27+
FOLK = DATA_DIR / "folk.mcpb"
28+
NATIONALPARKS = DATA_DIR / "nationalparks.mcpb"
29+
30+
ALL_BUNDLES = [
31+
pytest.param(FINNHUB, id="finnhub"),
32+
pytest.param(FOLK, id="folk"),
33+
pytest.param(NATIONALPARKS, id="nationalparks"),
34+
]
35+
36+
PYTHON_BUNDLES = [
37+
pytest.param(FINNHUB, id="finnhub"),
38+
pytest.param(FOLK, id="folk"),
39+
]
40+
41+
NODE_BUNDLES = [
42+
pytest.param(NATIONALPARKS, id="nationalparks"),
43+
]
44+
45+
46+
def skip_if_missing(bundle_path: Path) -> None:
47+
if not bundle_path.exists():
48+
pytest.skip(f"Bundle not found: {bundle_path.name} (run: mpak bundle pull ... -o {bundle_path})")
49+
50+
51+
@pytest.mark.e2e
52+
class TestBundleCompleteness:
53+
"""AI-05: Real bundles should not have false-positive unexpected executables."""
54+
55+
@pytest.mark.parametrize("bundle", ALL_BUNDLES)
56+
def test_ai05_passes(self, bundle: Path) -> None:
57+
"""AI-05 should PASS on all published bundles (no false positives)."""
58+
skip_if_missing(bundle)
59+
report = scan_bundle(bundle)
60+
61+
ai05 = report.all_controls.get("AI-05")
62+
assert ai05 is not None
63+
assert ai05.status == ControlStatus.PASS, f"AI-05 false positives on {bundle.name}: " + ", ".join(
64+
f.title for f in ai05.findings if f.severity in {Severity.HIGH, Severity.CRITICAL}
65+
)
66+
67+
@pytest.mark.parametrize("bundle", ALL_BUNDLES)
68+
def test_no_high_or_critical_in_ai05(self, bundle: Path) -> None:
69+
"""AI-05 should have zero HIGH/CRITICAL findings on published bundles."""
70+
skip_if_missing(bundle)
71+
report = scan_bundle(bundle)
72+
73+
ai05 = report.all_controls.get("AI-05")
74+
assert ai05 is not None
75+
blocking = [f for f in ai05.findings if f.severity in {Severity.HIGH, Severity.CRITICAL}]
76+
assert blocking == [], f"Blocking findings on {bundle.name}: {[f.title for f in blocking]}"
77+
78+
79+
@pytest.mark.e2e
80+
class TestManifestValidation:
81+
"""AI-01: Real bundles should have valid manifests."""
82+
83+
@pytest.mark.parametrize("bundle", ALL_BUNDLES)
84+
def test_ai01_passes(self, bundle: Path) -> None:
85+
skip_if_missing(bundle)
86+
report = scan_bundle(bundle)
87+
88+
ai01 = report.all_controls.get("AI-01")
89+
assert ai01 is not None
90+
assert ai01.status == ControlStatus.PASS, f"AI-01 failed on {bundle.name}: {ai01.findings}"
91+
92+
93+
@pytest.mark.e2e
94+
class TestSafeExecution:
95+
"""CQ-05: Real bundles should pass safe execution checks."""
96+
97+
@pytest.mark.parametrize("bundle", ALL_BUNDLES)
98+
def test_cq05_passes(self, bundle: Path) -> None:
99+
skip_if_missing(bundle)
100+
report = scan_bundle(bundle)
101+
102+
cq05 = report.all_controls.get("CQ-05")
103+
assert cq05 is not None
104+
assert cq05.status == ControlStatus.PASS, f"CQ-05 failed on {bundle.name}: {cq05.findings}"
105+
106+
107+
@pytest.mark.e2e
108+
class TestFullScan:
109+
"""Full scan results for each bundle."""
110+
111+
@pytest.mark.parametrize("bundle", PYTHON_BUNDLES)
112+
def test_python_bundles_no_critical_findings(self, bundle: Path) -> None:
113+
"""Python bundles should have no CRITICAL findings across all controls."""
114+
skip_if_missing(bundle)
115+
report = scan_bundle(bundle)
116+
117+
critical = []
118+
for control_id, result in report.all_controls.items():
119+
for f in result.findings:
120+
if f.severity == Severity.CRITICAL:
121+
critical.append(f"{control_id}: {f.title}")
122+
assert critical == [], f"Critical findings on {bundle.name}: {critical}"
123+
124+
@pytest.mark.parametrize("bundle", NODE_BUNDLES)
125+
def test_node_bundles_no_critical_findings(self, bundle: Path) -> None:
126+
"""Node.js bundles should have no CRITICAL findings across all controls."""
127+
skip_if_missing(bundle)
128+
report = scan_bundle(bundle)
129+
130+
critical = []
131+
for control_id, result in report.all_controls.items():
132+
for f in result.findings:
133+
if f.severity == Severity.CRITICAL:
134+
critical.append(f"{control_id}: {f.title}")
135+
assert critical == [], f"Critical findings on {bundle.name}: {critical}"
136+
137+
@pytest.mark.parametrize("bundle", ALL_BUNDLES)
138+
def test_scan_completes_without_errors(self, bundle: Path) -> None:
139+
"""Scanner should not produce ERROR status on any control."""
140+
skip_if_missing(bundle)
141+
report = scan_bundle(bundle)
142+
143+
errors = [f"{cid}: {r.findings}" for cid, r in report.all_controls.items() if r.status == ControlStatus.ERROR]
144+
assert errors == [], f"Controls errored on {bundle.name}: {errors}"

apps/scanner/tests/test_scanner.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,93 @@ def test_python_module_flag_still_catches_unrelated_files(self, bundle_dir: Path
862862
assert result.status == ControlStatus.FAIL
863863
assert any("backdoor.py" in f.file for f in result.findings if f.file)
864864

865+
def test_node_multi_file_build_passes(self, bundle_dir: Path) -> None:
866+
"""Node.js entry point in build/ should treat sibling modules as referenced."""
867+
manifest = {
868+
"name": "@test/node-multi",
869+
"version": "1.0.0",
870+
"server": {
871+
"type": "node",
872+
"entry_point": "build/index.js",
873+
"mcp_config": {
874+
"command": "node",
875+
"args": ["${__dirname}/build/index.js", "--stdio"],
876+
},
877+
},
878+
}
879+
(bundle_dir / "manifest.json").write_text(json.dumps(manifest))
880+
(bundle_dir / "package.json").write_text('{"name": "test"}')
881+
build = bundle_dir / "build"
882+
build.mkdir()
883+
(build / "index.js").write_text("import './config.js';")
884+
(build / "config.js").write_text("export const cfg = {};")
885+
(build / "schemas.js").write_text("export const schemas = {};")
886+
handlers = build / "handlers"
887+
handlers.mkdir()
888+
(handlers / "findParks.js").write_text("export function findParks() {}")
889+
(handlers / "getAlerts.js").write_text("export function getAlerts() {}")
890+
891+
from mpak_scanner.controls.artifact_integrity import AI05BundleCompleteness
892+
893+
control = AI05BundleCompleteness()
894+
result = control.run(bundle_dir, manifest)
895+
assert result.status == ControlStatus.PASS
896+
897+
def test_node_build_with_stray_script_fails(self, bundle_dir: Path) -> None:
898+
"""Node.js build/ files are allowed but stray scripts at root should fail."""
899+
manifest = {
900+
"name": "@test/node-stray",
901+
"version": "1.0.0",
902+
"server": {
903+
"type": "node",
904+
"entry_point": "build/index.js",
905+
"mcp_config": {
906+
"command": "node",
907+
"args": ["${__dirname}/build/index.js"],
908+
},
909+
},
910+
}
911+
(bundle_dir / "manifest.json").write_text(json.dumps(manifest))
912+
build = bundle_dir / "build"
913+
build.mkdir()
914+
(build / "index.js").write_text("console.log('ok')")
915+
# Stray script outside build/
916+
(bundle_dir / "deploy.sh").write_text("#!/bin/bash\nrm -rf /")
917+
918+
from mpak_scanner.controls.artifact_integrity import AI05BundleCompleteness
919+
920+
control = AI05BundleCompleteness()
921+
result = control.run(bundle_dir, manifest)
922+
assert result.status == ControlStatus.FAIL
923+
assert any("deploy.sh" in f.file for f in result.findings if f.file)
924+
# build/index.js should NOT be flagged
925+
assert not any("build/index.js" in f.file for f in result.findings if f.file)
926+
927+
def test_node_root_entry_point_no_over_allow(self, bundle_dir: Path) -> None:
928+
"""Node.js entry point at root should not whitelist all files."""
929+
manifest = {
930+
"name": "@test/node-root",
931+
"version": "1.0.0",
932+
"server": {
933+
"type": "node",
934+
"entry_point": "index.js",
935+
"mcp_config": {
936+
"command": "node",
937+
"args": ["${__dirname}/index.js"],
938+
},
939+
},
940+
}
941+
(bundle_dir / "manifest.json").write_text(json.dumps(manifest))
942+
(bundle_dir / "index.js").write_text("console.log('ok')")
943+
(bundle_dir / "backdoor.js").write_text("require('child_process').exec('evil')")
944+
945+
from mpak_scanner.controls.artifact_integrity import AI05BundleCompleteness
946+
947+
control = AI05BundleCompleteness()
948+
result = control.run(bundle_dir, manifest)
949+
assert result.status == ControlStatus.FAIL
950+
assert any("backdoor.js" in f.file for f in result.findings if f.file)
951+
865952

866953
# =============================================================================
867954
# Fixture-based Integration Tests

apps/scanner/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)