1
1
(*****************************************************************************)
2
2
(* Prelude *)
3
3
(*****************************************************************************)
4
- (* Specification of the Semgrep CLI JSON output format using ATD
4
+ (* Specification of the Semgrep CLI JSON output formats using ATD
5
5
* (see https://atd.readthedocs.io/en/latest/ for more information on ATD).
6
6
*
7
- * This file specifies the JSON format of the 'semgrep scan --json' command,
8
- * as well as the JSON format of messages sent to the Semgrep backend by the
9
- * 'semgrep ci' command, and the format of 'semgrep test --json'.
7
+ * This file specifies the JSON formats of:
8
+ * - the output of the 'semgrep scan --json' command
9
+ * - the output of the 'semgrep test --json' command
10
+ * - the messages sent (and received) to the Semgrep backend by the
11
+ * 'semgrep ci' command
12
+ *
13
+ * There are other important form of outputs which are not specified here:
14
+ * - The semgrep metrics sent to https://metrics.semgrep.dev in
15
+ * semgrep_metrics.atd
16
+ * - The parsing stats of semgrep-core -parsing_stats -json have its own
17
+ * Parsing_stats.atd
18
+ * - The schema for the generic AST dump is in AST_generic_v1.atd
10
19
* For the definition of the Semgrep input (the rules), see rule_schema_v2.atd
11
20
*
12
21
* This file has the _v1 suffix to explicitely represent the
24
33
* should not rely on them as those fields may be changed or removed.
25
34
* They are not part of the "API" of Semgrep.
26
35
*
27
- * !! Again, keep in mind that this file is used both by the CLI to *produce* a
36
+ * Again, keep in mind that this file is used both by the CLI to *produce* a
28
37
* JSON output, and by our backends to *consume* the JSON, including to
29
- * consume the JSON produced by old versions of the CLI. As of Sep 2023,
30
- * our backend is still supporting as far as Semgrep 1.17.0!!
38
+ * consume the JSON produced by old versions of the CLI. As of Nov 2024,
39
+ * our backend is still supporting as far as Semgrep 1.50.0 released Nov 2023.
40
+ * (see server/semgrep_app/util/cli_version_support.py in the semgrep-app repo)
31
41
*
32
42
*
33
43
* This file is translated in OCaml modules by atdgen. Look for the
54
64
* removing gradually the semgrep-core JSON output.
55
65
* - it was extended to support 'semgrep ci' output to type most messages
56
66
* sent between the Semgrep CLI and the Semgrep backend
57
- *
58
- * There are other important form of outputs which are not specified here:
59
- * - The semgrep metrics sent to https://metrics.semgrep.dev in
60
- * semgrep_metrics.atd
61
- * - The parsing stats of semgrep-core -parsing_stats -json have its own
62
- * Parsing_stats.atd
63
- * - The schema for the generic AST dump is in AST_generic_v1.atd
64
67
*)
65
68
66
69
(* escape hatch *)
@@ -70,7 +73,10 @@ type raw_json <ocaml module="Yojson.Basic" t="t"> = abstract
70
73
(* String aliases *)
71
74
(*****************************************************************************)
72
75
73
- (* File path. less: could convert directly to Path class of pathlib library for Python *)
76
+ (* File path.
77
+ * less: could convert directly to Path class of pathlib library for Python
78
+ * See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap.
79
+ *)
74
80
type fpath <ocaml attr="deriving show, eq"> <python decorator="dataclass(frozen=True)">= string wrap <ocaml module="ATD_string_wrap.Fpath">
75
81
76
82
type uri = string wrap <ocaml module="ATD_string_wrap.Uri">
@@ -188,17 +194,19 @@ type pro_feature
188
194
proprietary_language: bool;
189
195
}
190
196
191
- (* Report the engine used to detect each finding. Additionally, if we are able to
192
- infer that the finding could only be detected using the pro engine, report that
193
- the pro engine is required and include basic information about which feature is
194
- required.
197
+ (* Report the engine used to detect each finding. Additionally, if we are able
198
+ to infer that the finding could only be detected using the pro engine,
199
+ report that the pro engine is required and include basic information about
200
+ which feature is required.
195
201
196
202
OSS = ran with OSS
197
- PRO = ran with PRO, but we didn't infer that OSS couldn't have found this finding
203
+ PRO = ran with PRO, but we didn't infer that OSS couldn't have found this
204
+ finding
198
205
PRO_REQUIRED = ran with PRO and requires a PRO feature (see pro_feature_used)
199
206
200
207
Note: OSS and PRO could have clearer names, but for backwards compatibility
201
- we're leaving them as is *)
208
+ we're leaving them as is
209
+ *)
202
210
type engine_of_finding
203
211
<ocaml attr="deriving show">
204
212
<python decorator="dataclass(frozen=True)"> = [
@@ -303,7 +311,8 @@ type cli_match = {
303
311
}
304
312
305
313
type cli_match_extra = {
306
- (* TODO: inherit match_extra; but need ?metavars because of dependency_aware code *)
314
+ (* Since 1.98.0, you need to be logged in to get this field.
315
+ * note: we also need ?metavars because dependency_aware code *)
307
316
?metavars: metavars option;
308
317
309
318
(* Those fields are derived from the rule but the metavariables
@@ -327,7 +336,7 @@ type cli_match_extra = {
327
336
metadata: raw_json;
328
337
severity: match_severity;
329
338
330
- (* added by the CLI (see formatter/json.py) *)
339
+ (* Since 1.98.0, you need to be logged in to get those fields *)
331
340
fingerprint: string;
332
341
lines: string;
333
342
@@ -336,14 +345,14 @@ type cli_match_extra = {
336
345
(* EXPERIMENTAL: added by dependency_aware code *)
337
346
?sca_info: sca_info option;
338
347
339
- (* EXPERIMENTAL: For now, present only for taint findings. May be extended to others
340
- * later on. *)
348
+ (* EXPERIMENTAL: For now, present only for taint findings. May be extended to
349
+ * otherslater on. *)
341
350
?dataflow_trace: match_dataflow_trace option;
342
351
343
352
?engine_kind: engine_of_finding option;
344
353
345
- (* EXPERIMENTAL: If present indicates the status of postprocessor validation. This
346
- * field not being present should be equivalent to No_validator.
354
+ (* EXPERIMENTAL: If present indicates the status of postprocessor validation.
355
+ * This field not being present should be equivalent to No_validator.
347
356
* Added in semgrep 1.37.0 *)
348
357
?validation_state: validation_state option;
349
358
(* EXPERIMENTAL: added by secrets post-processing & historical scanning code
@@ -360,8 +369,6 @@ type cli_match_extra = {
360
369
361
370
(* Name/value map of the matched metavariables.
362
371
* The leading '$' must be included in the metavariable name.
363
- * TODO: semgrep-core always return a metavars, but dependency_aware Python code
364
- * does not always generate a metavars
365
372
*)
366
373
type metavars = (string * metavar_value) list
367
374
<json repr="object">
0 commit comments