promptfoo · mldangelo · Mar 13, 2026 · Mar 13, 2026 · Mar 14, 2026 · coderabbitai
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -123,6 +123,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **security:** flag risky import-only pickle references for `torch.jit`, `torch._dynamo`, `torch._inductor`, `torch.compile`, `torch.storage._load_from_bytes`, `numpy.f2py`, and `numpy.distutils` while preserving safe state-dict reconstruction paths
 - **security:** add low-severity pickle structural tamper findings for duplicate or misplaced `PROTO` opcodes while avoiding benign binary-tail false positives
 - **security:** scan OCI layer members based on registered file extensions so embedded ONNX, Keras H5, and other real-path scanners are no longer skipped inside tar layers
 - **security:** resolve bare-module TorchServe handler references like `custom_handler` to concrete archive members so malicious handler source is no longer skipped by static analysis

diff --git a/modelaudit/config/explanations.py b/modelaudit/config/explanations.py
@@ -99,6 +99,34 @@
         "The 'dill' module extends pickle's capabilities to serialize almost any Python object, including lambda "
         "functions and code objects. This significantly increases the attack surface for code execution."
     ),
+    "torch.jit": (
+        "The 'torch.jit' module can load and execute serialized TorchScript artifacts. In untrusted model files, "
+        "this can introduce unsafe runtime behavior and should be treated as a high-risk import surface."
+    ),
+    "torch._dynamo": (
+        "The 'torch._dynamo' internals drive dynamic graph capture and compilation. Importing these internals from "
+        "untrusted pickle payloads is suspicious because they enable advanced runtime execution pathways."
+    ),
+    "torch._inductor": (
+        "The 'torch._inductor' compiler backend can generate and execute optimized kernels at runtime. In model "
+        "artifacts, this is a risky import surface that should be reviewed as potentially unsafe."
+    ),
+    "torch.compile": (
+        "The 'torch.compile' API triggers runtime compilation and execution pipelines. In untrusted serialized "
+        "payloads, this can be used to reach risky execution paths and should be flagged."
+    ),
+    "torch.storage._load_from_bytes": (
+        "The 'torch.storage._load_from_bytes' function reconstructs storages from raw bytes and can be abused in "
+        "malicious pickle chains. References from untrusted payloads should be treated as dangerous."
+    ),
+    "numpy.f2py": (
+        "The 'numpy.f2py' toolchain bridges Python and compiled Fortran extensions. References in untrusted "
+        "pickles are risky because they can touch native-code compilation/loading paths."
+    ),
+    "numpy.distutils": (
+        "The 'numpy.distutils' build utilities are tied to extension module compilation and setup workflows. "
+        "Importing them from serialized model payloads is suspicious and may indicate unsafe behavior."
+    ),
     "numpy.load": (
         "The 'numpy.load' function can recursively deserialize object arrays via pickle support, enabling "
         "second-stage payload loading from attacker-controlled files."
@@ -428,8 +456,14 @@ def get_import_explanation(module_name: str) -> str | None:
     """Get explanation for a dangerous import/module."""
     if module_name in DANGEROUS_IMPORTS:
         return get_explanation("import", module_name)
-    # Handle module.function format (e.g., "os.system")
-    base_module = module_name.split(".")[0]
+
+    parts = module_name.split(".")
+    for i in range(len(parts) - 1, 0, -1):
+        parent = ".".join(parts[:i])
+        if parent in DANGEROUS_IMPORTS:
+            return get_explanation("import", parent)
+
+    base_module = parts[0]
     return get_explanation("import", base_module)
 
 

diff --git a/modelaudit/scanners/pickle_scanner.py b/modelaudit/scanners/pickle_scanner.py
@@ -655,6 +655,31 @@ def _compute_pickle_length(path: str) -> int:
     "glob",
 }
 
+# Risky ML-specific import surfaces that must be flagged even when they appear
+# as import-only GLOBAL/STACK_GLOBAL references (without immediate REDUCE).
+RISKY_ML_MODULE_PREFIXES: tuple[str, ...] = (
+    "torch.jit",
+    "torch._dynamo",
+    "torch._inductor",
+    "numpy.f2py",
+    "numpy.distutils",
+)
+
+RISKY_ML_EXACT_REFS: set[tuple[str, str]] = {
+    ("torch", "compile"),
+    ("torch.storage", "_load_from_bytes"),
+}
+
+
+def _split_parent_child_ref(prefix: str) -> tuple[str, str]:
+    parent, _separator, child = prefix.rpartition(".")
+    return parent, child
+
+
+RISKY_ML_PARENT_CHILD_REFS: frozenset[tuple[str, str]] = frozenset(
+    _split_parent_child_ref(prefix) for prefix in RISKY_ML_MODULE_PREFIXES
+)
+
 
 def _is_dangerous_module(mod: str) -> bool:
     """Check if module is in ALWAYS_DANGEROUS_MODULES (exact or prefix match).
@@ -1905,6 +1930,26 @@ def _is_safe_ml_global(mod: str, func: str) -> bool:
     return False
 
 
+def _is_risky_ml_import(mod: str, func: str) -> bool:
+    """Return True when module/function matches risky ML import policy."""
+    full_ref = f"{mod}.{func}" if func else mod
+    parts = full_ref.split(".")
+
+    for i in range(1, len(parts)):
+        candidate_mod = ".".join(parts[:i])
+        candidate_func = ".".join(parts[i:])
+        if (candidate_mod, candidate_func) in RISKY_ML_EXACT_REFS:
+            return True
+        if (candidate_mod, candidate_func) in RISKY_ML_PARENT_CHILD_REFS:
+            return True
+        if any(
+            candidate_mod == prefix or candidate_mod.startswith(f"{prefix}.") for prefix in RISKY_ML_MODULE_PREFIXES
+        ):
+            return True
+
+    return False
+
+
 def _is_copyreg_extension_ref(mod: str) -> bool:
     """Return True when a reference came from an EXT opcode extension lookup."""
     return mod == COPYREG_EXTENSION_MODULE
@@ -1954,6 +1999,13 @@ def _is_actually_dangerous_global(mod: str, func: str, ml_context: dict) -> bool
         logger.warning(f"Extension-registry callable detected via EXT opcode: {full_ref}")
         return True
 
+    # STEP 0.5: Risky ML imports should be flagged even in import-only payloads.
+    # These are intentionally separate from the broad ML safe allowlist because
+    # they map to runtime loading/compilation pathways with elevated risk.
+    if _is_risky_ml_import(mod, func):
+        logger.warning(f"Risky ML import detected: {full_ref}")
+        return True
+
     # STEP 1: ALWAYS flag dangerous functions first (no exceptions, no allowlist override)
     # This MUST come before the ML_SAFE_GLOBALS check to prevent bypass attacks
     # where an attacker places dangerous functions (e.g., operator.attrgetter) in a
@@ -2588,6 +2640,10 @@ def is_suspicious_global(mod: str, func: str) -> bool:
     First checks against ML_SAFE_GLOBALS allowlist to reduce false positives
     for legitimate ML framework operations.
     """
+    # STEP 0: Always flag risky ML imports before any allowlist checks.
+    if _is_risky_ml_import(mod, func):
+        return True
+
     # STEP 1: Check ML_SAFE_GLOBALS allowlist first
     # If the module.function is in the safe list, it's not suspicious
     if mod in ML_SAFE_GLOBALS:
@@ -5055,7 +5111,7 @@ def get_depth(x):
                             0,
                         ),
                     },
-                    why=get_import_explanation(module_name)
+                    why=get_import_explanation(f"{module_name}.{func_name}")
                     if module_name
                     else "A dangerous pattern was detected that could execute arbitrary code during unpickling.",
                 )