Skip to content

Commit 22b4226

Browse files
authored
Add a SARIF output formatter (#1113)
This commit adds a formatter that outputs JSON in a specific SARIF format according to spec at [1]. This code is largely leveraged from an existing implementation found here [2]. SARIF format is very useful for integration into ecosystems such as GitHub's Actions. [1] https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html [2] https://github.com/microsoft/bandit-sarif-formatter Closes #646 Signed-off-by: Eric Brown <[email protected]>
1 parent b603dce commit 22b4226

File tree

8 files changed

+530
-1
lines changed

8 files changed

+530
-1
lines changed

bandit/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@
1616
from bandit.core.issue import * # noqa
1717
from bandit.core.test_properties import * # noqa
1818

19+
__author__ = metadata.metadata("bandit")["Author"]
1920
__version__ = metadata.version("bandit")

bandit/formatters/sarif.py

+372
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
# Copyright (c) Microsoft. All Rights Reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Note: this code mostly incorporated from
6+
# https://github.com/microsoft/bandit-sarif-formatter
7+
#
8+
r"""
9+
===============
10+
SARIF formatter
11+
===============
12+
13+
This formatter outputs the issues in SARIF formatted JSON.
14+
15+
:Example:
16+
17+
.. code-block:: javascript
18+
19+
{
20+
"runs": [
21+
{
22+
"tool": {
23+
"driver": {
24+
"name": "Bandit",
25+
"organization": "PyCQA",
26+
"rules": [
27+
{
28+
"id": "B101",
29+
"name": "assert_used",
30+
"properties": {
31+
"tags": [
32+
"security",
33+
"external/cwe/cwe-703"
34+
],
35+
"precision": "high"
36+
},
37+
"helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html"
38+
}
39+
],
40+
"version": "1.7.8",
41+
"semanticVersion": "1.7.8"
42+
}
43+
},
44+
"invocations": [
45+
{
46+
"executionSuccessful": true,
47+
"endTimeUtc": "2024-03-05T03:28:48Z"
48+
}
49+
],
50+
"properties": {
51+
"metrics": {
52+
"_totals": {
53+
"loc": 1,
54+
"nosec": 0,
55+
"skipped_tests": 0,
56+
"SEVERITY.UNDEFINED": 0,
57+
"CONFIDENCE.UNDEFINED": 0,
58+
"SEVERITY.LOW": 1,
59+
"CONFIDENCE.LOW": 0,
60+
"SEVERITY.MEDIUM": 0,
61+
"CONFIDENCE.MEDIUM": 0,
62+
"SEVERITY.HIGH": 0,
63+
"CONFIDENCE.HIGH": 1
64+
},
65+
"./examples/assert.py": {
66+
"loc": 1,
67+
"nosec": 0,
68+
"skipped_tests": 0,
69+
"SEVERITY.UNDEFINED": 0,
70+
"SEVERITY.LOW": 1,
71+
"SEVERITY.MEDIUM": 0,
72+
"SEVERITY.HIGH": 0,
73+
"CONFIDENCE.UNDEFINED": 0,
74+
"CONFIDENCE.LOW": 0,
75+
"CONFIDENCE.MEDIUM": 0,
76+
"CONFIDENCE.HIGH": 1
77+
}
78+
}
79+
},
80+
"results": [
81+
{
82+
"message": {
83+
"text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code."
84+
},
85+
"level": "note",
86+
"locations": [
87+
{
88+
"physicalLocation": {
89+
"region": {
90+
"snippet": {
91+
"text": "assert True\n"
92+
},
93+
"endColumn": 11,
94+
"endLine": 1,
95+
"startColumn": 0,
96+
"startLine": 1
97+
},
98+
"artifactLocation": {
99+
"uri": "examples/assert.py"
100+
},
101+
"contextRegion": {
102+
"snippet": {
103+
"text": "assert True\n"
104+
},
105+
"endLine": 1,
106+
"startLine": 1
107+
}
108+
}
109+
}
110+
],
111+
"properties": {
112+
"issue_confidence": "HIGH",
113+
"issue_severity": "LOW"
114+
},
115+
"ruleId": "B101",
116+
"ruleIndex": 0
117+
}
118+
]
119+
}
120+
],
121+
"version": "2.1.0",
122+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json"
123+
}
124+
125+
.. versionadded:: 1.7.8
126+
127+
""" # noqa: E501
128+
import logging
129+
import pathlib
130+
import sys
131+
import urllib.parse as urlparse
132+
from datetime import datetime
133+
134+
import sarif_om as om
135+
from jschema_to_python.to_json import to_json
136+
137+
import bandit
138+
from bandit.core import docs_utils
139+
140+
LOG = logging.getLogger(__name__)
141+
SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json"
142+
SCHEMA_VER = "2.1.0"
143+
TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
144+
145+
146+
def report(manager, fileobj, sev_level, conf_level, lines=-1):
147+
"""Prints issues in SARIF format
148+
149+
:param manager: the bandit manager object
150+
:param fileobj: The output file object, which may be sys.stdout
151+
:param sev_level: Filtering severity level
152+
:param conf_level: Filtering confidence level
153+
:param lines: Number of lines to report, -1 for all
154+
"""
155+
156+
log = om.SarifLog(
157+
schema_uri=SCHEMA_URI,
158+
version=SCHEMA_VER,
159+
runs=[
160+
om.Run(
161+
tool=om.Tool(
162+
driver=om.ToolComponent(
163+
name="Bandit",
164+
organization=bandit.__author__,
165+
semantic_version=bandit.__version__,
166+
version=bandit.__version__,
167+
)
168+
),
169+
invocations=[
170+
om.Invocation(
171+
end_time_utc=datetime.utcnow().strftime(TS_FORMAT),
172+
execution_successful=True,
173+
)
174+
],
175+
properties={"metrics": manager.metrics.data},
176+
)
177+
],
178+
)
179+
180+
run = log.runs[0]
181+
invocation = run.invocations[0]
182+
183+
skips = manager.get_skipped()
184+
add_skipped_file_notifications(skips, invocation)
185+
186+
issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level)
187+
188+
add_results(issues, run)
189+
190+
serializedLog = to_json(log)
191+
192+
with fileobj:
193+
fileobj.write(serializedLog)
194+
195+
if fileobj.name != sys.stdout.name:
196+
LOG.info("SARIF output written to file: %s", fileobj.name)
197+
198+
199+
def add_skipped_file_notifications(skips, invocation):
200+
if skips is None or len(skips) == 0:
201+
return
202+
203+
if invocation.tool_configuration_notifications is None:
204+
invocation.tool_configuration_notifications = []
205+
206+
for skip in skips:
207+
(file_name, reason) = skip
208+
209+
notification = om.Notification(
210+
level="error",
211+
message=om.Message(text=reason),
212+
locations=[
213+
om.Location(
214+
physical_location=om.PhysicalLocation(
215+
artifact_location=om.ArtifactLocation(
216+
uri=to_uri(file_name)
217+
)
218+
)
219+
)
220+
],
221+
)
222+
223+
invocation.tool_configuration_notifications.append(notification)
224+
225+
226+
def add_results(issues, run):
227+
if run.results is None:
228+
run.results = []
229+
230+
rules = {}
231+
rule_indices = {}
232+
for issue in issues:
233+
result = create_result(issue, rules, rule_indices)
234+
run.results.append(result)
235+
236+
if len(rules) > 0:
237+
run.tool.driver.rules = list(rules.values())
238+
239+
240+
def create_result(issue, rules, rule_indices):
241+
issue_dict = issue.as_dict()
242+
243+
rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices)
244+
245+
physical_location = om.PhysicalLocation(
246+
artifact_location=om.ArtifactLocation(
247+
uri=to_uri(issue_dict["filename"])
248+
)
249+
)
250+
251+
add_region_and_context_region(
252+
physical_location,
253+
issue_dict["line_range"],
254+
issue_dict["col_offset"],
255+
issue_dict["end_col_offset"],
256+
issue_dict["code"],
257+
)
258+
259+
return om.Result(
260+
rule_id=rule.id,
261+
rule_index=rule_index,
262+
message=om.Message(text=issue_dict["issue_text"]),
263+
level=level_from_severity(issue_dict["issue_severity"]),
264+
locations=[om.Location(physical_location=physical_location)],
265+
properties={
266+
"issue_confidence": issue_dict["issue_confidence"],
267+
"issue_severity": issue_dict["issue_severity"],
268+
},
269+
)
270+
271+
272+
def level_from_severity(severity):
273+
if severity == "HIGH":
274+
return "error"
275+
elif severity == "MEDIUM":
276+
return "warning"
277+
elif severity == "LOW":
278+
return "note"
279+
else:
280+
return "warning"
281+
282+
283+
def add_region_and_context_region(
284+
physical_location, line_range, col_offset, end_col_offset, code
285+
):
286+
if code:
287+
first_line_number, snippet_lines = parse_code(code)
288+
snippet_line = snippet_lines[line_range[0] - first_line_number]
289+
snippet = om.ArtifactContent(text=snippet_line)
290+
else:
291+
snippet = None
292+
293+
physical_location.region = om.Region(
294+
start_line=line_range[0],
295+
end_line=line_range[1] if len(line_range) > 1 else line_range[0],
296+
start_column=col_offset + 1,
297+
end_column=end_col_offset + 1,
298+
snippet=snippet,
299+
)
300+
301+
if code:
302+
physical_location.context_region = om.Region(
303+
start_line=first_line_number,
304+
end_line=first_line_number + len(snippet_lines) - 1,
305+
snippet=om.ArtifactContent(text="".join(snippet_lines)),
306+
)
307+
308+
309+
def parse_code(code):
310+
code_lines = code.split("\n")
311+
312+
# The last line from the split has nothing in it; it's an artifact of the
313+
# last "real" line ending in a newline. Unless, of course, it doesn't:
314+
last_line = code_lines[len(code_lines) - 1]
315+
316+
last_real_line_ends_in_newline = False
317+
if len(last_line) == 0:
318+
code_lines.pop()
319+
last_real_line_ends_in_newline = True
320+
321+
snippet_lines = []
322+
first_line_number = 0
323+
first = True
324+
for code_line in code_lines:
325+
number_and_snippet_line = code_line.split(" ", 1)
326+
if first:
327+
first_line_number = int(number_and_snippet_line[0])
328+
first = False
329+
330+
snippet_line = number_and_snippet_line[1] + "\n"
331+
snippet_lines.append(snippet_line)
332+
333+
if not last_real_line_ends_in_newline:
334+
last_line = snippet_lines[len(snippet_lines) - 1]
335+
snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1]
336+
337+
return first_line_number, snippet_lines
338+
339+
340+
def create_or_find_rule(issue_dict, rules, rule_indices):
341+
rule_id = issue_dict["test_id"]
342+
if rule_id in rules:
343+
return rules[rule_id], rule_indices[rule_id]
344+
345+
rule = om.ReportingDescriptor(
346+
id=rule_id,
347+
name=issue_dict["test_name"],
348+
help_uri=docs_utils.get_url(rule_id),
349+
properties={
350+
"tags": [
351+
"security",
352+
f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}",
353+
],
354+
"precision": issue_dict["issue_confidence"].lower(),
355+
},
356+
)
357+
358+
index = len(rules)
359+
rules[rule_id] = rule
360+
rule_indices[rule_id] = index
361+
return rule, index
362+
363+
364+
def to_uri(file_path):
365+
pure_path = pathlib.PurePath(file_path)
366+
if pure_path.is_absolute():
367+
return pure_path.as_uri()
368+
else:
369+
# Replace backslashes with slashes.
370+
posix_path = pure_path.as_posix()
371+
# %-encode special characters.
372+
return urlparse.quote(posix_path)

doc/source/formatters/sarif.rst

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-----
2+
sarif
3+
-----
4+
5+
.. automodule:: bandit.formatters.sarif

doc/source/man/bandit.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ OPTIONS
4444
(-l for LOW, -ll for MEDIUM, -lll for HIGH)
4545
-i, --confidence report only issues of a given confidence level or
4646
higher (-i for LOW, -ii for MEDIUM, -iii for HIGH)
47-
-f {csv,custom,html,json,screen,txt,xml,yaml}, --format {csv,custom,html,json,screen,txt,xml,yaml}
47+
-f {csv,custom,html,json,sarif,screen,txt,xml,yaml}, --format {csv,custom,html,json,sarif,screen,txt,xml,yaml}
4848
specify output format
4949
--msg-template MSG_TEMPLATE
5050
specify output message template (only usable with

0 commit comments

Comments
 (0)