promptfoo · mldangelo · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **security:** recurse into object-dtype `.npy` payloads and `.npz` object members with the pickle scanner while preserving CVE-2019-6446 warnings and archive-member context
 - **security:** harden TensorFlow weight extraction limits to bound actual tensor payload materialization, including malformed `tensor_content` and string-backed tensors, and continue scanning past oversized `Const` nodes
 - **security:** stream TAR members to temp files under size limits instead of buffering whole entries in memory during scan
 - **security:** inspect TensorFlow SavedModel function definitions when scanning for dangerous ops and protobuf string abuse, with function-aware finding locations

diff --git a/modelaudit/scanners/numpy_scanner.py b/modelaudit/scanners/numpy_scanner.py
@@ -4,9 +4,10 @@
 
 import sys
 import warnings
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, BinaryIO, ClassVar
 
 from .base import BaseScanner, IssueSeverity, ScanResult
+from .pickle_scanner import PickleScanner
 
 # Import NumPy with compatibility handling
 try:
@@ -88,6 +89,17 @@ def _validate_array_dimensions(self, shape: tuple[int, ...]) -> None:
     CVE_2019_6446_CVSS = 9.8
     CVE_2019_6446_CWE = "CWE-502"
 
+    def _scan_embedded_pickle_payload(
+        self,
+        file_obj: BinaryIO,
+        payload_size: int,
+        context_path: str,
+    ) -> ScanResult:
+        """Reuse PickleScanner analysis for object-dtype NumPy payloads."""
+        pickle_scanner = PickleScanner(config=self.config)
+        pickle_scanner.current_file_path = context_path
+        return pickle_scanner._scan_pickle_bytes(file_obj, payload_size)
+
     def _validate_dtype(self, dtype: Any) -> None:
         """Validate numpy dtype for security"""
         # Check for problematic data types
@@ -299,6 +311,15 @@ def scan(self, path: str) -> ScanResult:
                                 ),
                             )
 
+                            f.seek(data_offset)
+                            embedded_result = self._scan_embedded_pickle_payload(
+                                f,
+                                file_size - data_offset,
+                                path,
+                            )
+                            result.issues.extend(embedded_result.issues)
+                            result.checks.extend(embedded_result.checks)
+
                         self._validate_dtype(dtype)
                         result.add_check(
                             name="Data Type Safety Check",

diff --git a/tests/scanners/test_numpy_scanner.py b/tests/scanners/test_numpy_scanner.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import numpy as np
 
 from modelaudit.scanners.base import IssueSeverity
@@ -100,3 +102,120 @@ def test_structured_with_object_field_triggers_cve(self, tmp_path):
 
         cve_checks = [c for c in result.checks if "CVE-2019-6446" in (c.name + c.message)]
         assert len(cve_checks) > 0, "Structured dtype with object field should trigger CVE"
+
+
+class _ExecPayload:
+    def __reduce__(self):
+        return (exec, ("print('owned')",))
+
+
+class _SSLPayload:
+    def __reduce__(self):
+        import ssl
+
+        return (ssl.get_server_certificate, (("example.com", 443),))
+
+
+def _failed_checks(result):
+    return [c for c in result.checks if c.status.value == "failed"]
+
+
+def test_object_dtype_numpy_recurses_into_pickle_exec(tmp_path: Path) -> None:
+    arr = np.array([_ExecPayload()], dtype=object)
+    path = tmp_path / "malicious_object.npy"
+    np.save(path, arr, allow_pickle=True)
+
+    scanner = NumPyScanner()
+    result = scanner.scan(str(path))
+
+    failed = _failed_checks(result)
+    assert any("CVE-2019-6446" in (c.name + c.message) for c in failed)
+    assert any("exec" in (c.message.lower()) for c in failed)
+
+
+def test_object_dtype_numpy_recurses_into_pickle_ssl(tmp_path: Path) -> None:
+    arr = np.array([_SSLPayload()], dtype=object)
+    path = tmp_path / "malicious_ssl_object.npy"
+    np.save(path, arr, allow_pickle=True)
+
+    scanner = NumPyScanner()
+    result = scanner.scan(str(path))
+
+    failed = _failed_checks(result)
+    assert any("CVE-2019-6446" in (c.name + c.message) for c in failed)
+    assert any("ssl.get_server_certificate" in c.message for c in failed)
+
+
+def test_numeric_npz_has_no_pickle_recursion_findings(tmp_path: Path) -> None:
+    npz_path = tmp_path / "numeric_only.npz"
+    np.savez(npz_path, a=np.arange(4), b=np.ones((2, 2), dtype=np.float32))
+
+    from modelaudit.scanners.zip_scanner import ZipScanner
+
+    result = ZipScanner().scan(str(npz_path))
+
+    assert not any("CVE-2019-6446" in (c.name + c.message) for c in result.checks)
+    assert not any("exec" in c.message.lower() for c in result.checks)
+
+
+def test_object_npz_member_recurses_into_pickle_exec_with_member_context(tmp_path: Path) -> None:
+    safe = np.array([1, 2, 3], dtype=np.int64)
+    malicious = np.array([_ExecPayload()], dtype=object)
+    npz_path = tmp_path / "mixed_object.npz"
+    np.savez(npz_path, safe=safe, payload=malicious)
+
+    from modelaudit.scanners.zip_scanner import ZipScanner
+
+    result = ZipScanner().scan(str(npz_path))
+
+    failed = _failed_checks(result)
+    assert any("CVE-2019-6446" in (c.name + c.message) and "payload.npy" in str(c.location) for c in failed)
+    assert any("exec" in i.message.lower() and i.details.get("zip_entry") == "payload.npy" for i in result.issues)
+
+
+def test_benign_object_dtype_numpy_no_nested_critical(tmp_path: Path) -> None:
+    arr = np.array([{"k": "v"}, [1, 2, 3]], dtype=object)
+    path = tmp_path / "benign_object.npy"
+    np.save(path, arr, allow_pickle=True)
+
+    scanner = NumPyScanner()
+    result = scanner.scan(str(path))
+
+    assert any("CVE-2019-6446" in (c.name + c.message) for c in result.checks)
+    assert not any(i.severity == IssueSeverity.CRITICAL for i in result.issues if "CVE-2019-6446" not in i.message)
+
+
+def test_benign_object_dtype_npz_no_nested_critical(tmp_path: Path) -> None:
+    npz_path = tmp_path / "benign_object.npz"
+    np.savez(npz_path, safe=np.array([{"x": 1}], dtype=object))
+
+    from modelaudit.scanners.zip_scanner import ZipScanner
+
+    result = ZipScanner().scan(str(npz_path))
+
+    assert any("CVE-2019-6446" in (c.name + c.message) for c in result.checks)
+    assert not any(i.severity == IssueSeverity.CRITICAL for i in result.issues)
+
+
+def test_truncated_npy_fails_safely(tmp_path: Path) -> None:
+    arr = np.array([_ExecPayload()], dtype=object)
+    path = tmp_path / "truncated.npy"
+    np.save(path, arr, allow_pickle=True)
+    path.write_bytes(path.read_bytes()[:-8])
+
+    scanner = NumPyScanner()
+    result = scanner.scan(str(path))
+
+    assert any(i.severity == IssueSeverity.INFO for i in result.issues)
+
+
+def test_corrupted_npz_fails_safely(tmp_path: Path) -> None:
+    npz_path = tmp_path / "corrupt.npz"
+    npz_path.write_bytes(b"not-a-zip")
+
+    from modelaudit.scanners.zip_scanner import ZipScanner
+
+    result = ZipScanner().scan(str(npz_path))
+
+    assert result.success is False
+    assert any(i.severity == IssueSeverity.INFO for i in result.issues)
diff --git a/tests/scanners/test_zip_scanner.py b/tests/scanners/test_zip_scanner.py
@@ -314,6 +314,23 @@ def test_scan_zip_with_prefixed_proto0_pickle_disguised_as_text(self, tmp_path:
             f"Expected critical os/posix.system issue, got: {critical_messages}"
         )
 
+    def test_scan_npz_with_object_member_recurses_into_pickle(self, tmp_path: Path) -> None:
+        import numpy as np
+
+        class _ExecPayload:
+            def __reduce__(self):
+                return (exec, ("print('owned')",))
+
+        archive_path = tmp_path / "payload.npz"
+        np.savez(archive_path, safe=np.arange(3), payload=np.array([_ExecPayload()], dtype=object))
+
+        result = self.scanner.scan(str(archive_path))
+        assert result.success is True
+
+        failed_checks = [c for c in result.checks if c.status.value == "failed"]
+        assert any("cve-2019-6446" in (c.name + c.message).lower() for c in failed_checks)
+        assert any("exec" in i.message.lower() and i.details.get("zip_entry") == "payload.npy" for i in result.issues)
+
     def test_scan_zip_with_plain_text_global_prefix_not_treated_as_pickle(self, tmp_path: Path) -> None:
         """Plain text entries that start with GLOBAL-like bytes should not trigger pickle parse warnings."""
         archive_path = tmp_path / "plain_text_payload.zip"