|
| 1 | +# Copyright (c) Microsoft. All Rights Reserved. |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | +# |
| 5 | +# Note: this code mostly incorporated from |
| 6 | +# https://github.com/microsoft/bandit-sarif-formatter |
| 7 | +# |
| 8 | +r""" |
| 9 | +=============== |
| 10 | +SARIF formatter |
| 11 | +=============== |
| 12 | +
|
| 13 | +This formatter outputs the issues in SARIF formatted JSON. |
| 14 | +
|
| 15 | +:Example: |
| 16 | +
|
| 17 | +.. code-block:: javascript |
| 18 | +
|
| 19 | + { |
| 20 | + "runs": [ |
| 21 | + { |
| 22 | + "tool": { |
| 23 | + "driver": { |
| 24 | + "name": "Bandit", |
| 25 | + "organization": "PyCQA", |
| 26 | + "rules": [ |
| 27 | + { |
| 28 | + "id": "B101", |
| 29 | + "name": "assert_used", |
| 30 | + "properties": { |
| 31 | + "tags": [ |
| 32 | + "security", |
| 33 | + "external/cwe/cwe-703" |
| 34 | + ], |
| 35 | + "precision": "high" |
| 36 | + }, |
| 37 | + "helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html" |
| 38 | + } |
| 39 | + ], |
| 40 | + "version": "1.7.8", |
| 41 | + "semanticVersion": "1.7.8" |
| 42 | + } |
| 43 | + }, |
| 44 | + "invocations": [ |
| 45 | + { |
| 46 | + "executionSuccessful": true, |
| 47 | + "endTimeUtc": "2024-03-05T03:28:48Z" |
| 48 | + } |
| 49 | + ], |
| 50 | + "properties": { |
| 51 | + "metrics": { |
| 52 | + "_totals": { |
| 53 | + "loc": 1, |
| 54 | + "nosec": 0, |
| 55 | + "skipped_tests": 0, |
| 56 | + "SEVERITY.UNDEFINED": 0, |
| 57 | + "CONFIDENCE.UNDEFINED": 0, |
| 58 | + "SEVERITY.LOW": 1, |
| 59 | + "CONFIDENCE.LOW": 0, |
| 60 | + "SEVERITY.MEDIUM": 0, |
| 61 | + "CONFIDENCE.MEDIUM": 0, |
| 62 | + "SEVERITY.HIGH": 0, |
| 63 | + "CONFIDENCE.HIGH": 1 |
| 64 | + }, |
| 65 | + "./examples/assert.py": { |
| 66 | + "loc": 1, |
| 67 | + "nosec": 0, |
| 68 | + "skipped_tests": 0, |
| 69 | + "SEVERITY.UNDEFINED": 0, |
| 70 | + "SEVERITY.LOW": 1, |
| 71 | + "SEVERITY.MEDIUM": 0, |
| 72 | + "SEVERITY.HIGH": 0, |
| 73 | + "CONFIDENCE.UNDEFINED": 0, |
| 74 | + "CONFIDENCE.LOW": 0, |
| 75 | + "CONFIDENCE.MEDIUM": 0, |
| 76 | + "CONFIDENCE.HIGH": 1 |
| 77 | + } |
| 78 | + } |
| 79 | + }, |
| 80 | + "results": [ |
| 81 | + { |
| 82 | + "message": { |
| 83 | + "text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code." |
| 84 | + }, |
| 85 | + "level": "note", |
| 86 | + "locations": [ |
| 87 | + { |
| 88 | + "physicalLocation": { |
| 89 | + "region": { |
| 90 | + "snippet": { |
| 91 | + "text": "assert True\n" |
| 92 | + }, |
| 93 | + "endColumn": 11, |
| 94 | + "endLine": 1, |
| 95 | + "startColumn": 0, |
| 96 | + "startLine": 1 |
| 97 | + }, |
| 98 | + "artifactLocation": { |
| 99 | + "uri": "examples/assert.py" |
| 100 | + }, |
| 101 | + "contextRegion": { |
| 102 | + "snippet": { |
| 103 | + "text": "assert True\n" |
| 104 | + }, |
| 105 | + "endLine": 1, |
| 106 | + "startLine": 1 |
| 107 | + } |
| 108 | + } |
| 109 | + } |
| 110 | + ], |
| 111 | + "properties": { |
| 112 | + "issue_confidence": "HIGH", |
| 113 | + "issue_severity": "LOW" |
| 114 | + }, |
| 115 | + "ruleId": "B101", |
| 116 | + "ruleIndex": 0 |
| 117 | + } |
| 118 | + ] |
| 119 | + } |
| 120 | + ], |
| 121 | + "version": "2.1.0", |
| 122 | + "$schema": "https://json.schemastore.org/sarif-2.1.0.json" |
| 123 | + } |
| 124 | +
|
| 125 | +.. versionadded:: 1.7.8 |
| 126 | +
|
| 127 | +""" # noqa: E501 |
| 128 | +import logging |
| 129 | +import pathlib |
| 130 | +import sys |
| 131 | +import urllib.parse as urlparse |
| 132 | +from datetime import datetime |
| 133 | + |
| 134 | +import sarif_om as om |
| 135 | +from jschema_to_python.to_json import to_json |
| 136 | + |
| 137 | +import bandit |
| 138 | +from bandit.core import docs_utils |
| 139 | + |
| 140 | +LOG = logging.getLogger(__name__) |
| 141 | +SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json" |
| 142 | +SCHEMA_VER = "2.1.0" |
| 143 | +TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ" |
| 144 | + |
| 145 | + |
| 146 | +def report(manager, fileobj, sev_level, conf_level, lines=-1): |
| 147 | + """Prints issues in SARIF format |
| 148 | +
|
| 149 | + :param manager: the bandit manager object |
| 150 | + :param fileobj: The output file object, which may be sys.stdout |
| 151 | + :param sev_level: Filtering severity level |
| 152 | + :param conf_level: Filtering confidence level |
| 153 | + :param lines: Number of lines to report, -1 for all |
| 154 | + """ |
| 155 | + |
| 156 | + log = om.SarifLog( |
| 157 | + schema_uri=SCHEMA_URI, |
| 158 | + version=SCHEMA_VER, |
| 159 | + runs=[ |
| 160 | + om.Run( |
| 161 | + tool=om.Tool( |
| 162 | + driver=om.ToolComponent( |
| 163 | + name="Bandit", |
| 164 | + organization=bandit.__author__, |
| 165 | + semantic_version=bandit.__version__, |
| 166 | + version=bandit.__version__, |
| 167 | + ) |
| 168 | + ), |
| 169 | + invocations=[ |
| 170 | + om.Invocation( |
| 171 | + end_time_utc=datetime.utcnow().strftime(TS_FORMAT), |
| 172 | + execution_successful=True, |
| 173 | + ) |
| 174 | + ], |
| 175 | + properties={"metrics": manager.metrics.data}, |
| 176 | + ) |
| 177 | + ], |
| 178 | + ) |
| 179 | + |
| 180 | + run = log.runs[0] |
| 181 | + invocation = run.invocations[0] |
| 182 | + |
| 183 | + skips = manager.get_skipped() |
| 184 | + add_skipped_file_notifications(skips, invocation) |
| 185 | + |
| 186 | + issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level) |
| 187 | + |
| 188 | + add_results(issues, run) |
| 189 | + |
| 190 | + serializedLog = to_json(log) |
| 191 | + |
| 192 | + with fileobj: |
| 193 | + fileobj.write(serializedLog) |
| 194 | + |
| 195 | + if fileobj.name != sys.stdout.name: |
| 196 | + LOG.info("SARIF output written to file: %s", fileobj.name) |
| 197 | + |
| 198 | + |
| 199 | +def add_skipped_file_notifications(skips, invocation): |
| 200 | + if skips is None or len(skips) == 0: |
| 201 | + return |
| 202 | + |
| 203 | + if invocation.tool_configuration_notifications is None: |
| 204 | + invocation.tool_configuration_notifications = [] |
| 205 | + |
| 206 | + for skip in skips: |
| 207 | + (file_name, reason) = skip |
| 208 | + |
| 209 | + notification = om.Notification( |
| 210 | + level="error", |
| 211 | + message=om.Message(text=reason), |
| 212 | + locations=[ |
| 213 | + om.Location( |
| 214 | + physical_location=om.PhysicalLocation( |
| 215 | + artifact_location=om.ArtifactLocation( |
| 216 | + uri=to_uri(file_name) |
| 217 | + ) |
| 218 | + ) |
| 219 | + ) |
| 220 | + ], |
| 221 | + ) |
| 222 | + |
| 223 | + invocation.tool_configuration_notifications.append(notification) |
| 224 | + |
| 225 | + |
| 226 | +def add_results(issues, run): |
| 227 | + if run.results is None: |
| 228 | + run.results = [] |
| 229 | + |
| 230 | + rules = {} |
| 231 | + rule_indices = {} |
| 232 | + for issue in issues: |
| 233 | + result = create_result(issue, rules, rule_indices) |
| 234 | + run.results.append(result) |
| 235 | + |
| 236 | + if len(rules) > 0: |
| 237 | + run.tool.driver.rules = list(rules.values()) |
| 238 | + |
| 239 | + |
| 240 | +def create_result(issue, rules, rule_indices): |
| 241 | + issue_dict = issue.as_dict() |
| 242 | + |
| 243 | + rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices) |
| 244 | + |
| 245 | + physical_location = om.PhysicalLocation( |
| 246 | + artifact_location=om.ArtifactLocation( |
| 247 | + uri=to_uri(issue_dict["filename"]) |
| 248 | + ) |
| 249 | + ) |
| 250 | + |
| 251 | + add_region_and_context_region( |
| 252 | + physical_location, |
| 253 | + issue_dict["line_range"], |
| 254 | + issue_dict["col_offset"], |
| 255 | + issue_dict["end_col_offset"], |
| 256 | + issue_dict["code"], |
| 257 | + ) |
| 258 | + |
| 259 | + return om.Result( |
| 260 | + rule_id=rule.id, |
| 261 | + rule_index=rule_index, |
| 262 | + message=om.Message(text=issue_dict["issue_text"]), |
| 263 | + level=level_from_severity(issue_dict["issue_severity"]), |
| 264 | + locations=[om.Location(physical_location=physical_location)], |
| 265 | + properties={ |
| 266 | + "issue_confidence": issue_dict["issue_confidence"], |
| 267 | + "issue_severity": issue_dict["issue_severity"], |
| 268 | + }, |
| 269 | + ) |
| 270 | + |
| 271 | + |
| 272 | +def level_from_severity(severity): |
| 273 | + if severity == "HIGH": |
| 274 | + return "error" |
| 275 | + elif severity == "MEDIUM": |
| 276 | + return "warning" |
| 277 | + elif severity == "LOW": |
| 278 | + return "note" |
| 279 | + else: |
| 280 | + return "warning" |
| 281 | + |
| 282 | + |
| 283 | +def add_region_and_context_region( |
| 284 | + physical_location, line_range, col_offset, end_col_offset, code |
| 285 | +): |
| 286 | + if code: |
| 287 | + first_line_number, snippet_lines = parse_code(code) |
| 288 | + snippet_line = snippet_lines[line_range[0] - first_line_number] |
| 289 | + snippet = om.ArtifactContent(text=snippet_line) |
| 290 | + else: |
| 291 | + snippet = None |
| 292 | + |
| 293 | + physical_location.region = om.Region( |
| 294 | + start_line=line_range[0], |
| 295 | + end_line=line_range[1] if len(line_range) > 1 else line_range[0], |
| 296 | + start_column=col_offset + 1, |
| 297 | + end_column=end_col_offset + 1, |
| 298 | + snippet=snippet, |
| 299 | + ) |
| 300 | + |
| 301 | + if code: |
| 302 | + physical_location.context_region = om.Region( |
| 303 | + start_line=first_line_number, |
| 304 | + end_line=first_line_number + len(snippet_lines) - 1, |
| 305 | + snippet=om.ArtifactContent(text="".join(snippet_lines)), |
| 306 | + ) |
| 307 | + |
| 308 | + |
| 309 | +def parse_code(code): |
| 310 | + code_lines = code.split("\n") |
| 311 | + |
| 312 | + # The last line from the split has nothing in it; it's an artifact of the |
| 313 | + # last "real" line ending in a newline. Unless, of course, it doesn't: |
| 314 | + last_line = code_lines[len(code_lines) - 1] |
| 315 | + |
| 316 | + last_real_line_ends_in_newline = False |
| 317 | + if len(last_line) == 0: |
| 318 | + code_lines.pop() |
| 319 | + last_real_line_ends_in_newline = True |
| 320 | + |
| 321 | + snippet_lines = [] |
| 322 | + first_line_number = 0 |
| 323 | + first = True |
| 324 | + for code_line in code_lines: |
| 325 | + number_and_snippet_line = code_line.split(" ", 1) |
| 326 | + if first: |
| 327 | + first_line_number = int(number_and_snippet_line[0]) |
| 328 | + first = False |
| 329 | + |
| 330 | + snippet_line = number_and_snippet_line[1] + "\n" |
| 331 | + snippet_lines.append(snippet_line) |
| 332 | + |
| 333 | + if not last_real_line_ends_in_newline: |
| 334 | + last_line = snippet_lines[len(snippet_lines) - 1] |
| 335 | + snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1] |
| 336 | + |
| 337 | + return first_line_number, snippet_lines |
| 338 | + |
| 339 | + |
| 340 | +def create_or_find_rule(issue_dict, rules, rule_indices): |
| 341 | + rule_id = issue_dict["test_id"] |
| 342 | + if rule_id in rules: |
| 343 | + return rules[rule_id], rule_indices[rule_id] |
| 344 | + |
| 345 | + rule = om.ReportingDescriptor( |
| 346 | + id=rule_id, |
| 347 | + name=issue_dict["test_name"], |
| 348 | + help_uri=docs_utils.get_url(rule_id), |
| 349 | + properties={ |
| 350 | + "tags": [ |
| 351 | + "security", |
| 352 | + f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}", |
| 353 | + ], |
| 354 | + "precision": issue_dict["issue_confidence"].lower(), |
| 355 | + }, |
| 356 | + ) |
| 357 | + |
| 358 | + index = len(rules) |
| 359 | + rules[rule_id] = rule |
| 360 | + rule_indices[rule_id] = index |
| 361 | + return rule, index |
| 362 | + |
| 363 | + |
| 364 | +def to_uri(file_path): |
| 365 | + pure_path = pathlib.PurePath(file_path) |
| 366 | + if pure_path.is_absolute(): |
| 367 | + return pure_path.as_uri() |
| 368 | + else: |
| 369 | + # Replace backslashes with slashes. |
| 370 | + posix_path = pure_path.as_posix() |
| 371 | + # %-encode special characters. |
| 372 | + return urlparse.quote(posix_path) |
0 commit comments