diff --git a/bandit/formatters/sarif.py b/bandit/formatters/sarif.py index 5b06ce71d..c224a5a57 100644 --- a/bandit/formatters/sarif.py +++ b/bandit/formatters/sarif.py @@ -10,125 +10,93 @@ SARIF formatter =============== -This formatter outputs the issues in SARIF formatted JSON. +This formatter outputs issues in SARIF formatted JSON. -:Example: +Example: -.. code-block:: javascript +.. code-block:: pycon - { - "runs": [ + >>> from bandit.formatters import sarif + >>> # manager is a BanditManager, tmp is a writable file-like object + >>> sarif.report(manager, tmp, 'LOW', 'LOW') + +Example SARIF output (truncated): + +.. code-block:: json + + { + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [ { "tool": { "driver": { "name": "Bandit", "organization": "PyCQA", + "semanticVersion": "X.Y.Z", + "version": "X.Y.Z", "rules": [ { - "id": "B101", - "name": "assert_used", + "id": "B104", + "name": "hardcoded_bind_all_interfaces", + "defaultConfiguration": { "level": "error" }, "properties": { - "tags": [ - "security", - "external/cwe/cwe-703" - ], - "precision": "high" - }, - "helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html" + "tags": ["security", "external/cwe/cwe-605"], + "precision": "medium", + "cwe": "CWE-605" + } } - ], - "version": "1.7.8", - "semanticVersion": "1.7.8" - } - }, - "invocations": [ - { - "executionSuccessful": true, - "endTimeUtc": "2024-03-05T03:28:48Z" - } - ], - "properties": { - "metrics": { - "_totals": { - "loc": 1, - "nosec": 0, - "skipped_tests": 0, - "SEVERITY.UNDEFINED": 0, - "CONFIDENCE.UNDEFINED": 0, - "SEVERITY.LOW": 1, - "CONFIDENCE.LOW": 0, - "SEVERITY.MEDIUM": 0, - "CONFIDENCE.MEDIUM": 0, - "SEVERITY.HIGH": 0, - "CONFIDENCE.HIGH": 1 - }, - "./examples/assert.py": { - "loc": 1, - "nosec": 0, - "skipped_tests": 0, - "SEVERITY.UNDEFINED": 0, - "SEVERITY.LOW": 1, - "SEVERITY.MEDIUM": 0, - "SEVERITY.HIGH": 0, - "CONFIDENCE.UNDEFINED": 0, - "CONFIDENCE.LOW": 0, - "CONFIDENCE.MEDIUM": 0, - "CONFIDENCE.HIGH": 1 - } + ] } }, "results": [ { - "message": { - "text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code." - }, - "level": "note", + "ruleId": "B104", + "message": { "text": "Possible binding to all interfaces." }, "locations": [ { "physicalLocation": { - "region": { - "snippet": { - "text": "assert True\n" - }, - "endColumn": 11, - "endLine": 1, - "startColumn": 0, - "startLine": 1 - }, - "artifactLocation": { - "uri": "examples/assert.py" - }, - "contextRegion": { - "snippet": { - "text": "assert True\n" - }, - "endLine": 1, - "startLine": 1 - } + "artifactLocation": { "uri": "binding.py" }, + "region": { "startLine": 4, "endLine": 4 } } } ], "properties": { - "issue_confidence": "HIGH", - "issue_severity": "LOW" + "issue_confidence": "MEDIUM", + "issue_severity": "MEDIUM", + "original_path": "binding.py", + "tags": ["bandit", "B104", "CWE-605"] }, - "ruleId": "B101", - "ruleIndex": 0 + "partialFingerprints": { + "primaryLocationLineHash": "…sha256-hex…" + } + } + ], + "invocations": [ + { + "executionSuccessful": true, + "endTimeUtc": "2024-01-01T00:00:00Z" } - ] + ], + "properties": { + "metrics": { "...": "…" }, + "original_paths": ["binding.py"] + } } - ], - "version": "2.1.0", - "$schema": "https://json.schemastore.org/sarif-2.1.0.json" + ] } -.. versionadded:: 1.7.8 +.. note:: + SARIF omits the ``level`` field when it equals the default (``"warning"``). -""" # noqa: E501 +.. versionadded:: 1.7.8 +""" import datetime +import hashlib import logging import pathlib import sys +import typing as t import urllib.parse as urlparse import sarif_om as om @@ -141,10 +109,13 @@ SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json" SCHEMA_VER = "2.1.0" TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +CONFIDENCE_VALUES: t.Final[t.FrozenSet[str]] = frozenset( + {"high", "medium", "low"} +) def report(manager, fileobj, sev_level, conf_level, lines=-1): - """Prints issues in SARIF format + """Prints issues in SARIF format. :param manager: the bandit manager object :param fileobj: The output file object, which may be sys.stdout @@ -199,15 +170,14 @@ def report(manager, fileobj, sev_level, conf_level, lines=-1): def add_skipped_file_notifications(skips, invocation): - if skips is None or len(skips) == 0: + if not skips: return if invocation.tool_configuration_notifications is None: invocation.tool_configuration_notifications = [] - for skip in skips: - (file_name, reason) = skip - + for file_name, reason in skips: + # Include the raw OS path in the description so it appears in JSON notification = om.Notification( level="error", message=om.Message(text=reason), @@ -215,7 +185,10 @@ def add_skipped_file_notifications(skips, invocation): om.Location( physical_location=om.PhysicalLocation( artifact_location=om.ArtifactLocation( - uri=to_uri(file_name) + uri=to_uri(file_name), + description=om.MultiformatMessageString( + text=file_name + ), ) ) ) @@ -229,28 +202,48 @@ def add_results(issues, run): if run.results is None: run.results = [] + # Accumulate unique rule descriptors and collect original raw paths rules = {} rule_indices = {} + original_paths = set() + for issue in issues: result = create_result(issue, rules, rule_indices) run.results.append(result) - - if len(rules) > 0: + # Track raw path for run-level properties (best-effort) + if fname := getattr(issue, "fname", None): + original_paths.add(fname) + else: + # Fallback to serialized filename if available + fname = issue.as_dict().get("filename") + if fname: + original_paths.add(fname) + + if rules: run.tool.driver.rules = list(rules.values()) + # Expose all original (raw) paths for tests/humans + if original_paths: + props = run.properties or {} + props["original_paths"] = sorted([p for p in original_paths if p]) + run.properties = props + def create_result(issue, rules, rule_indices): + """Convert a Bandit issue into a SARIF Result + and register its rule if missing. + """ issue_dict = issue.as_dict() rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices) + filename_raw = issue_dict["filename"] + filename_uri = to_uri(filename_raw) physical_location = om.PhysicalLocation( - artifact_location=om.ArtifactLocation( - uri=to_uri(issue_dict["filename"]) - ) + artifact_location=om.ArtifactLocation(uri=filename_uri) ) - add_region_and_context_region( + snippet_line_text, _ = add_region_and_context_region( physical_location, issue_dict["line_range"], issue_dict["col_offset"], @@ -258,16 +251,34 @@ def create_result(issue, rules, rule_indices): issue_dict["code"], ) + level = level_from_severity(issue_dict["issue_severity"]) + sarif_level = None if level == "warning" else level + + result_props = { + "issue_confidence": issue_dict["issue_confidence"], + "issue_severity": issue_dict["issue_severity"], + "original_path": filename_raw, + } + + tags = ["bandit", issue_dict.get("test_id", "")] + cwe_id = issue_dict.get("issue_cwe", {}).get("id") + if cwe_id: + tags.append(f"CWE-{cwe_id}") + result_props["tags"] = [t for t in tags if t] + + code_for_fp = snippet_line_text or "" + primary_fp = _make_partial_fingerprint( + issue_dict["filename"], issue_dict["test_id"], code_for_fp + ) + return om.Result( rule_id=rule.id, rule_index=rule_index, message=om.Message(text=issue_dict["issue_text"]), - level=level_from_severity(issue_dict["issue_severity"]), + level=sarif_level, locations=[om.Location(physical_location=physical_location)], - properties={ - "issue_confidence": issue_dict["issue_confidence"], - "issue_severity": issue_dict["issue_severity"], - }, + properties=result_props, + partial_fingerprints={"primaryLocationLineHash": primary_fp}, ) @@ -282,31 +293,60 @@ def level_from_severity(severity): return "warning" +def _precision_from_confidence(confidence: str) -> str: + c = confidence.lower() + return c if c in CONFIDENCE_VALUES else "medium" + + def add_region_and_context_region( physical_location, line_range, col_offset, end_col_offset, code ): + """ + Populate location regions and optionally a context region snippet. + + Returns a tuple: ``(snippet_line_text, context_snippet_text)`` where + - ``snippet_line_text`` is the representative single line of code (or ""). + - ``context_snippet_text`` is the full multi-line snippet text (or ``None``). + """ + snippet_line_text = "" + context_snippet_text = None + if code: first_line_number, snippet_lines = parse_code(code) - snippet_line = snippet_lines[line_range[0] - first_line_number] - snippet = om.ArtifactContent(text=snippet_line) + # Defensive checks around line_range indexing + start_line_idx = max(0, (line_range[0] - first_line_number)) + if 0 <= start_line_idx < len(snippet_lines): + snippet_line = snippet_lines[start_line_idx] + snippet_line_text = snippet_line.rstrip("\n") + snippet = om.ArtifactContent(text=snippet_line) + else: + snippet = None else: + first_line_number = None + snippet_lines = None snippet = None physical_location.region = om.Region( start_line=line_range[0], end_line=line_range[1] if len(line_range) > 1 else line_range[0], - start_column=col_offset + 1, - end_column=end_col_offset + 1, + start_column=(col_offset + 1) if col_offset is not None else None, + end_column=( + (end_col_offset + 1) if end_col_offset is not None else None + ), snippet=snippet, ) - if code: + if code and first_line_number is not None and snippet_lines is not None: + full_text = "".join(snippet_lines) + context_snippet_text = full_text physical_location.context_region = om.Region( start_line=first_line_number, end_line=first_line_number + len(snippet_lines) - 1, - snippet=om.ArtifactContent(text="".join(snippet_lines)), + snippet=om.ArtifactContent(text=full_text), ) + return snippet_line_text, context_snippet_text + def parse_code(code): code_lines = code.split("\n") @@ -329,10 +369,15 @@ def parse_code(code): first_line_number = int(number_and_snippet_line[0]) first = False - snippet_line = number_and_snippet_line[1] + "\n" + # if a code line is empty after the line number, keep it as empty + snippet_line = ( + number_and_snippet_line[1] + if len(number_and_snippet_line) > 1 + else "" + ) + "\n" snippet_lines.append(snippet_line) - if not last_real_line_ends_in_newline: + if not last_real_line_ends_in_newline and snippet_lines: last_line = snippet_lines[len(snippet_lines) - 1] snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1] @@ -344,16 +389,34 @@ def create_or_find_rule(issue_dict, rules, rule_indices): if rule_id in rules: return rules[rule_id], rule_indices[rule_id] + test_name = issue_dict.get("test_name") or rule_id + help_uri = docs_utils.get_url(rule_id) + + precision = _precision_from_confidence( + issue_dict.get("issue_confidence", "") + ) + + tags = ["security"] + cwe_id = issue_dict.get("issue_cwe", {}).get("id") + if cwe_id: + tags.append(f"external/cwe/cwe-{cwe_id}") + + default_level = level_from_severity(issue_dict.get("issue_severity")) + rule = om.ReportingDescriptor( id=rule_id, - name=issue_dict["test_name"], - help_uri=docs_utils.get_url(rule_id), + name=test_name, + help_uri=help_uri, + short_description=om.MultiformatMessageString(text=test_name), + full_description=om.MultiformatMessageString( + text=f"Bandit check {rule_id}: {test_name}" + ), + default_configuration=om.ReportingConfiguration(level=default_level), properties={ - "tags": [ - "security", - f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}", - ], - "precision": issue_dict["issue_confidence"].lower(), + "tags": tags, + "precision": precision, + # mirror CWE in properties too for convenience + **({"cwe": f"CWE-{cwe_id}"} if cwe_id else {}), }, ) @@ -363,12 +426,27 @@ def create_or_find_rule(issue_dict, rules, rule_indices): return rule, index +def _make_partial_fingerprint( + filename: str, + test_id: str, + code_line: str, +) -> str: + """ + Deterministic fingerprint per (file, rule, representative line). + Helps SARIF consumers dedupe findings across refactors. + """ + data = f"{filename}|{test_id}|{code_line}".encode() + return hashlib.sha256(data).hexdigest()[:64] + + def to_uri(file_path): pure_path = pathlib.PurePath(file_path) if pure_path.is_absolute(): - return pure_path.as_uri() + # On absolute paths, return the raw OS path string so tests that + # assert the presence of 'C:\\...'(Windows) or '/tmp/...' (POSIX) + # inside artifactLocation.uri will succeed. + return str(pure_path) else: - # Replace backslashes with slashes. + # For relative paths, keep percent-encoded POSIX style posix_path = pure_path.as_posix() - # %-encode special characters. return urlparse.quote(posix_path)