Skip to content

Commit 7059af6

Browse files
committed
feat(engine): add scanner DB cache volume mounts for container runs
Persist vulnerability databases (Trivy, Grype, ClamAV, OpenGrep) between container runs by mounting host-side cache dirs into containers. Saves ~500MB of re-downloads per full scan after first run. - Add CACHE_MOUNTS mapping and get_cache_mount() to containers.py - Default cache in $TMPDIR/argus-cache (cleaned on reboot, non-intrusive) - Override with ARGUS_CACHE_DIR env var for persistent caching - Add --no-cache flag to argus scan to opt out - Add argus cache info|clean subcommand for visibility - Update shell completions with new flags and cache command - Fix Codecov upload: use explicit file path, non-blocking errors - 29 new tests across containers, CLI, and engine
1 parent 0350c3f commit 7059af6

11 files changed

Lines changed: 330 additions & 11 deletions

File tree

.ai/architecture.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ components:
2727
type: sdk
2828
cli: "python -m argus scan --config argus.yml"
2929
structure:
30-
"cli.py": "argparse CLI with scan and report subcommands"
30+
"cli.py": "argparse CLI with scan, cache, and report subcommands"
3131
"init.py": "argus init project bootstrap and banner rendering from img/argus_logo.txt"
32+
"containers.py": "Container image manifest, version enforcement, and DB cache volume mounts (CACHE_MOUNTS)"
3233
"core/models.py": "Severity enum, Finding/ScanResult/ScanSummary dataclasses"
3334
"core/scanner.py": "Scanner Protocol definition"
3435
"core/config.py": "ArgusConfig loading from argus.yml"

.ai/workflows.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,9 @@ quick_reference:
363363
test_with_coverage: "pytest --cov"
364364
scan_verbose: "argus scan --verbose --severity-threshold none"
365365
scan_no_spinner: "argus scan --no-spinner --severity-threshold none"
366+
scan_no_cache: "argus scan --no-cache --severity-threshold none"
367+
cache_info: "argus cache info"
368+
cache_clean: "argus cache clean"
366369
lint: "npm run lint"
367370
release: "npm run release"
368371
format: "npm run format"

.github/workflows/test-unit.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ jobs:
8787
if: always()
8888
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6
8989
with:
90-
directory: ./coverage
90+
files: ./coverage/python.xml
9191
flags: unittests
9292
name: codecov-umbrella
93-
fail_ci_if_error: true
93+
fail_ci_if_error: false
9494
verbose: true
9595
token: ${{ secrets.CODECOV_TOKEN }}
9696

argus/cli.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Argus CLI — command-line interface for security scanning."""
22

33
import argparse
4+
import os
45
import sys
56
import threading
67
import time
@@ -147,6 +148,7 @@ def build_parser() -> argparse.ArgumentParser:
147148
_build_validate_parser(subparsers)
148149
_build_mcp_parser(subparsers)
149150
_build_completion_parser(subparsers)
151+
_build_cache_parser(subparsers)
150152

151153
return parser
152154

@@ -307,6 +309,12 @@ def _build_scan_parser(subparsers: argparse._SubParsersAction) -> None:
307309
help="Allow local tool versions that differ from argus-pinned versions. "
308310
"Use in airgapped environments where tool updates are constrained.",
309311
)
312+
scan_parser.add_argument(
313+
"--no-cache",
314+
action="store_true",
315+
help="Disable DB cache volume mounts. Forces scanners to re-download "
316+
"vulnerability databases on every container run.",
317+
)
310318

311319
# Container-specific flags (used with: argus scan container)
312320
container_group = scan_parser.add_argument_group(
@@ -633,6 +641,33 @@ def _build_completion_parser(subparsers: argparse._SubParsersAction) -> None:
633641
)
634642

635643

644+
def _build_cache_parser(subparsers: argparse._SubParsersAction) -> None:
645+
"""Add the 'cache' subcommand for managing scanner DB caches."""
646+
cache_parser = subparsers.add_parser(
647+
"cache",
648+
help="Manage scanner database caches",
649+
description=(
650+
"Manage cached vulnerability databases used by container-based scanners.\n\n"
651+
"Argus caches scanner databases (Trivy, Grype, ClamAV, etc.) in the system\n"
652+
"temp directory so container runs don't re-download hundreds of MB each time.\n"
653+
"The cache persists across runs within a session but is cleaned on reboot.\n\n"
654+
"Cache location: $TMPDIR/argus-cache (override with ARGUS_CACHE_DIR)\n"
655+
"For persistent caching: export ARGUS_CACHE_DIR=~/.argus/cache"
656+
),
657+
formatter_class=argparse.RawDescriptionHelpFormatter,
658+
)
659+
cache_sub = cache_parser.add_subparsers(dest="cache_action")
660+
661+
cache_sub.add_parser(
662+
"info",
663+
help="Show cache location and size per scanner",
664+
)
665+
cache_sub.add_parser(
666+
"clean",
667+
help="Remove all cached scanner databases",
668+
)
669+
670+
636671
def _build_report_parser(subparsers: argparse._SubParsersAction) -> None:
637672
"""Add the 'report' subcommand."""
638673
report_parser = subparsers.add_parser(
@@ -828,6 +863,7 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
828863
exclude=getattr(args, "exclude", ""),
829864
parallel=not getattr(args, "no_parallel", False),
830865
allow_local_versions=getattr(args, "allow_local_versions", False),
866+
no_cache=getattr(args, "no_cache", False),
831867
)
832868
log.info(
833869
"Scan complete: %d scanner(s), %d finding(s)",
@@ -1211,6 +1247,53 @@ def cmd_completion(args: argparse.Namespace) -> int:
12111247
return EXIT_SUCCESS
12121248

12131249

1250+
def cmd_cache(args: argparse.Namespace) -> int:
1251+
"""Manage scanner database caches."""
1252+
from argus.containers import CACHE_MOUNTS, _default_cache_root
1253+
1254+
cache_root = _default_cache_root()
1255+
action = getattr(args, "cache_action", None)
1256+
1257+
if action == "clean":
1258+
if cache_root.exists():
1259+
import shutil
1260+
shutil.rmtree(cache_root)
1261+
print(f"Removed cache directory: {cache_root}")
1262+
else:
1263+
print("No cache directory found.")
1264+
return EXIT_SUCCESS
1265+
1266+
# Default: info
1267+
print(f"Cache directory: {cache_root}")
1268+
if os.environ.get("ARGUS_CACHE_DIR"):
1269+
print(f" (set by ARGUS_CACHE_DIR)")
1270+
print()
1271+
1272+
total_size = 0
1273+
for scanner_key in sorted(CACHE_MOUNTS):
1274+
scanner_dir = cache_root / scanner_key
1275+
if scanner_dir.exists():
1276+
size = sum(f.stat().st_size for f in scanner_dir.rglob("*") if f.is_file())
1277+
total_size += size
1278+
print(f" {scanner_key:<15} {_format_size(size)}")
1279+
else:
1280+
print(f" {scanner_key:<15} (not cached)")
1281+
1282+
print(f"\n {'Total':<15} {_format_size(total_size)}")
1283+
return EXIT_SUCCESS
1284+
1285+
1286+
def _format_size(size_bytes: int) -> str:
1287+
"""Format byte count as human-readable string."""
1288+
if size_bytes < 1024:
1289+
return f"{size_bytes} B"
1290+
if size_bytes < 1024 * 1024:
1291+
return f"{size_bytes / 1024:.1f} KB"
1292+
if size_bytes < 1024 * 1024 * 1024:
1293+
return f"{size_bytes / (1024 * 1024):.1f} MB"
1294+
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
1295+
1296+
12141297
def cmd_mcp(args: argparse.Namespace) -> int:
12151298
"""Start the MCP server for AI assistant integration."""
12161299
try:
@@ -1245,6 +1328,7 @@ def _generate_zsh_completion(scanners: str) -> str:
12451328
'validate:Validate an argus.yml configuration file'
12461329
'mcp:Start the MCP server for AI assistant integration'
12471330
'completion:Generate shell completion script'
1331+
'cache:Manage scanner database caches'
12481332
)
12491333
12501334
scanners=({scanners})
@@ -1280,6 +1364,9 @@ def _generate_zsh_completion(scanners: str) -> str:
12801364
'--no-timestamp[Flat output directory]'
12811365
'--fail-fast[Abort on first failure]'
12821366
'--timeout[Per-scanner timeout]:seconds:'
1367+
'--no-parallel[Run scanners sequentially]'
1368+
'--allow-local-versions[Skip version enforcement]'
1369+
'--no-cache[Disable DB cache volume mounts]'
12831370
)
12841371
12851372
scan_container=(
@@ -1351,7 +1438,7 @@ def _generate_bash_completion(scanners: str) -> str:
13511438
cur="${{COMP_WORDS[COMP_CWORD]}}"
13521439
prev="${{COMP_WORDS[COMP_CWORD-1]}}"
13531440
1354-
commands="init scan classify collect report validate mcp completion"
1441+
commands="init scan classify collect report validate mcp completion cache"
13551442
scanners="{scanners}"
13561443
severity="critical high medium low none"
13571444
formats="terminal markdown sarif json"
@@ -1375,7 +1462,7 @@ def _generate_bash_completion(scanners: str) -> str:
13751462
--scan-type) COMPREPLY=($(compgen -W "baseline full" -- "$cur")); return ;;
13761463
--path|-p|--output-dir|-o|--config|-c|--output-vars) COMPREPLY=($(compgen -d -- "$cur")); return ;;
13771464
esac
1378-
COMPREPLY=($(compgen -W "--path --config --output-dir --severity-threshold --format --output-vars --list --verbose --no-spinner --no-timestamp --fail-fast --timeout" -- "$cur"))
1465+
COMPREPLY=($(compgen -W "--path --config --output-dir --severity-threshold --format --output-vars --list --verbose --no-spinner --no-timestamp --fail-fast --timeout --no-cache --no-parallel --allow-local-versions" -- "$cur"))
13791466
;;
13801467
report)
13811468
if [ "$COMP_CWORD" -eq 2 ]; then
@@ -1769,6 +1856,7 @@ def main(argv: list[str] | None = None) -> None:
17691856
"validate": cmd_validate,
17701857
"mcp": cmd_mcp,
17711858
"completion": cmd_completion,
1859+
"cache": cmd_cache,
17721860
}
17731861

17741862
handler = handlers.get(args.command)

argus/containers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
1. Single-point version updates
55
2. Dependabot/Renovate tracking
66
3. Registry override support
7+
4. DB cache volume mounts for persistent vulnerability databases
78
"""
89

10+
import os
11+
from pathlib import Path
12+
913
# Official images from tool authors (used directly, not rebuilt by argus)
1014
OFFICIAL_IMAGES = {
1115
"trivy": "aquasec/trivy:0.69.3",
@@ -69,3 +73,46 @@ def get_expected_version(scanner_name: str) -> str | None:
6973
"""
7074
image = get_image(scanner_name)
7175
return expected_version(image)
76+
77+
78+
# Scanner → container cache path mappings.
79+
# Keys are resolved via _ALIASES (same as get_image), values are the
80+
# absolute path inside the container where the tool stores its DB/cache.
81+
CACHE_MOUNTS: dict[str, str] = {
82+
"trivy": "/root/.cache/trivy",
83+
"grype": "/root/.cache/grype",
84+
"clamav": "/var/lib/clamav",
85+
"semgrep": "/root/.semgrep",
86+
"checkov": "/root/.checkov",
87+
}
88+
89+
90+
def _default_cache_root() -> Path:
91+
"""Return the host-side cache root directory.
92+
93+
Uses ``ARGUS_CACHE_DIR`` env var if set, otherwise a temporary
94+
directory (``$TMPDIR/argus-cache``). The temp dir is non-intrusive —
95+
it persists across runs within a session but is cleaned on reboot,
96+
avoiding permanent disk consumption on the host.
97+
"""
98+
env = os.environ.get("ARGUS_CACHE_DIR")
99+
if env:
100+
return Path(env)
101+
import tempfile
102+
return Path(tempfile.gettempdir()) / "argus-cache"
103+
104+
105+
def get_cache_mount(scanner_name: str) -> tuple[Path, str] | None:
106+
"""Return (host_path, container_path) for a scanner's DB cache.
107+
108+
Returns ``None`` if the scanner has no known cache directory.
109+
The host directory is created lazily if it does not exist.
110+
"""
111+
key = _ALIASES.get(scanner_name, scanner_name)
112+
container_path = CACHE_MOUNTS.get(key)
113+
if container_path is None:
114+
return None
115+
116+
host_dir = _default_cache_root() / key
117+
host_dir.mkdir(parents=True, exist_ok=True)
118+
return (host_dir, container_path)

argus/core/engine.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def __init__(self, config: ArgusConfig):
2222
self.config = config
2323
self._scanners: dict[str, Scanner] = {}
2424
self._allow_local_versions: bool = False
25+
self._no_cache: bool = False
2526

2627
def register_scanner(self, scanner: Scanner) -> None:
2728
"""Register a scanner instance for use by the engine."""
@@ -42,6 +43,7 @@ def run(
4243
exclude: str = "",
4344
parallel: bool = True,
4445
allow_local_versions: bool = False,
46+
no_cache: bool = False,
4547
) -> ScanSummary:
4648
"""Run scanners and return an aggregated ScanSummary.
4749
@@ -53,10 +55,12 @@ def run(
5355
exclude: comma-separated CLI exclusion patterns
5456
parallel: run scanners concurrently (default True)
5557
allow_local_versions: skip version enforcement for local tools
58+
no_cache: disable DB cache volume mounts for containers
5659
"""
5760
from .exclusions import build_exclusion_set, log_exclusion_set
5861

5962
self._allow_local_versions = allow_local_versions
63+
self._no_cache = no_cache
6064

6165
names_to_run = self._resolve_scanner_names(scanner_names)
6266
logger.debug(
@@ -518,6 +522,17 @@ def _run_in_container(
518522
"-v", f"{output_dir}:/output",
519523
]
520524

525+
# Mount host-side DB cache to persist vulnerability databases
526+
if not self._no_cache:
527+
from ..containers import get_cache_mount
528+
cache = get_cache_mount(scanner.name)
529+
if cache:
530+
host_dir, container_dir = cache
531+
docker_cmd.extend(["-v", f"{host_dir}:{container_dir}"])
532+
logger.debug(
533+
"DB cache mount: %s → %s", host_dir, container_dir,
534+
)
535+
521536
entrypoint = getattr(scanner, "container_entrypoint", None)
522537
if entrypoint:
523538
docker_cmd.extend(["--entrypoint", entrypoint])

argus/tests/test_cli.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,3 +724,53 @@ def test_list_shows_availability(self, monkeypatch, capsys):
724724
captured = capsys.readouterr()
725725
assert "local" in captured.out
726726
assert "not found" in captured.out
727+
728+
729+
class TestCacheSubcommand:
730+
"""Test parsing and execution of the 'cache' subcommand."""
731+
732+
def test_cache_command_parses(self):
733+
parser = build_parser()
734+
args = parser.parse_args(["cache", "info"])
735+
assert args.command == "cache"
736+
assert args.cache_action == "info"
737+
738+
def test_cache_clean_parses(self):
739+
parser = build_parser()
740+
args = parser.parse_args(["cache", "clean"])
741+
assert args.command == "cache"
742+
assert args.cache_action == "clean"
743+
744+
def test_cache_no_action_defaults_to_none(self):
745+
parser = build_parser()
746+
args = parser.parse_args(["cache"])
747+
assert args.command == "cache"
748+
assert args.cache_action is None
749+
750+
def test_no_cache_flag_on_scan(self):
751+
parser = build_parser()
752+
args = parser.parse_args(["scan", "--no-cache"])
753+
assert args.no_cache is True
754+
755+
def test_no_cache_flag_default(self):
756+
parser = build_parser()
757+
args = parser.parse_args(["scan"])
758+
assert args.no_cache is False
759+
760+
def test_cache_info_runs(self, tmp_path, monkeypatch):
761+
from argus.cli import cmd_cache
762+
monkeypatch.setenv("ARGUS_CACHE_DIR", str(tmp_path))
763+
args = build_parser().parse_args(["cache", "info"])
764+
result = cmd_cache(args)
765+
assert result == EXIT_SUCCESS
766+
767+
def test_cache_clean_runs(self, tmp_path, monkeypatch):
768+
from argus.cli import cmd_cache
769+
monkeypatch.setenv("ARGUS_CACHE_DIR", str(tmp_path))
770+
cache_dir = tmp_path / "trivy"
771+
cache_dir.mkdir()
772+
(cache_dir / "db.tar.gz").write_text("fake")
773+
args = build_parser().parse_args(["cache", "clean"])
774+
result = cmd_cache(args)
775+
assert result == EXIT_SUCCESS
776+
assert not tmp_path.exists()

0 commit comments

Comments
 (0)