Skip to content

Commit a95d339

Browse files
authored
Comments on minimum cli version and on gated logged in fields (#314)
test plan: make - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.17.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
1 parent 1c6af38 commit a95d339

File tree

1 file changed

+36
-29
lines changed

1 file changed

+36
-29
lines changed

semgrep_output_v1.atd

+36-29
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
(*****************************************************************************)
22
(* Prelude *)
33
(*****************************************************************************)
4-
(* Specification of the Semgrep CLI JSON output format using ATD
4+
(* Specification of the Semgrep CLI JSON output formats using ATD
55
* (see https://atd.readthedocs.io/en/latest/ for more information on ATD).
66
*
7-
* This file specifies the JSON format of the 'semgrep scan --json' command,
8-
* as well as the JSON format of messages sent to the Semgrep backend by the
9-
* 'semgrep ci' command, and the format of 'semgrep test --json'.
7+
* This file specifies the JSON formats of:
8+
* - the output of the 'semgrep scan --json' command
9+
* - the output of the 'semgrep test --json' command
10+
* - the messages sent (and received) to the Semgrep backend by the
11+
* 'semgrep ci' command
12+
*
13+
* There are other important form of outputs which are not specified here:
14+
* - The semgrep metrics sent to https://metrics.semgrep.dev in
15+
* semgrep_metrics.atd
16+
* - The parsing stats of semgrep-core -parsing_stats -json have its own
17+
* Parsing_stats.atd
18+
* - The schema for the generic AST dump is in AST_generic_v1.atd
1019
* For the definition of the Semgrep input (the rules), see rule_schema_v2.atd
1120
*
1221
* This file has the _v1 suffix to explicitely represent the
@@ -24,10 +33,11 @@
2433
* should not rely on them as those fields may be changed or removed.
2534
* They are not part of the "API" of Semgrep.
2635
*
27-
* !!Again, keep in mind that this file is used both by the CLI to *produce* a
36+
* Again, keep in mind that this file is used both by the CLI to *produce* a
2837
* JSON output, and by our backends to *consume* the JSON, including to
29-
* consume the JSON produced by old versions of the CLI. As of Sep 2023,
30-
* our backend is still supporting as far as Semgrep 1.17.0!!
38+
* consume the JSON produced by old versions of the CLI. As of Nov 2024,
39+
* our backend is still supporting as far as Semgrep 1.50.0 released Nov 2023.
40+
* (see server/semgrep_app/util/cli_version_support.py in the semgrep-app repo)
3141
*
3242
*
3343
* This file is translated in OCaml modules by atdgen. Look for the
@@ -54,13 +64,6 @@
5464
* removing gradually the semgrep-core JSON output.
5565
* - it was extended to support 'semgrep ci' output to type most messages
5666
* sent between the Semgrep CLI and the Semgrep backend
57-
*
58-
* There are other important form of outputs which are not specified here:
59-
* - The semgrep metrics sent to https://metrics.semgrep.dev in
60-
* semgrep_metrics.atd
61-
* - The parsing stats of semgrep-core -parsing_stats -json have its own
62-
* Parsing_stats.atd
63-
* - The schema for the generic AST dump is in AST_generic_v1.atd
6467
*)
6568

6669
(* escape hatch *)
@@ -70,7 +73,10 @@ type raw_json <ocaml module="Yojson.Basic" t="t"> = abstract
7073
(* String aliases *)
7174
(*****************************************************************************)
7275

73-
(* File path. less: could convert directly to Path class of pathlib library for Python *)
76+
(* File path.
77+
* less: could convert directly to Path class of pathlib library for Python
78+
* See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap.
79+
*)
7480
type fpath <ocaml attr="deriving show, eq"> <python decorator="dataclass(frozen=True)">= string wrap <ocaml module="ATD_string_wrap.Fpath">
7581

7682
type uri = string wrap <ocaml module="ATD_string_wrap.Uri">
@@ -188,17 +194,19 @@ type pro_feature
188194
proprietary_language: bool;
189195
}
190196

191-
(* Report the engine used to detect each finding. Additionally, if we are able to
192-
infer that the finding could only be detected using the pro engine, report that
193-
the pro engine is required and include basic information about which feature is
194-
required.
197+
(* Report the engine used to detect each finding. Additionally, if we are able
198+
to infer that the finding could only be detected using the pro engine,
199+
report that the pro engine is required and include basic information about
200+
which feature is required.
195201

196202
OSS = ran with OSS
197-
PRO = ran with PRO, but we didn't infer that OSS couldn't have found this finding
203+
PRO = ran with PRO, but we didn't infer that OSS couldn't have found this
204+
finding
198205
PRO_REQUIRED = ran with PRO and requires a PRO feature (see pro_feature_used)
199206

200207
Note: OSS and PRO could have clearer names, but for backwards compatibility
201-
we're leaving them as is *)
208+
we're leaving them as is
209+
*)
202210
type engine_of_finding
203211
<ocaml attr="deriving show">
204212
<python decorator="dataclass(frozen=True)"> = [
@@ -303,7 +311,8 @@ type cli_match = {
303311
}
304312

305313
type cli_match_extra = {
306-
(* TODO: inherit match_extra; but need ?metavars because of dependency_aware code *)
314+
(* Since 1.98.0, you need to be logged in to get this field.
315+
* note: we also need ?metavars because dependency_aware code *)
307316
?metavars: metavars option;
308317

309318
(* Those fields are derived from the rule but the metavariables
@@ -327,7 +336,7 @@ type cli_match_extra = {
327336
metadata: raw_json;
328337
severity: match_severity;
329338

330-
(* added by the CLI (see formatter/json.py) *)
339+
(* Since 1.98.0, you need to be logged in to get those fields *)
331340
fingerprint: string;
332341
lines: string;
333342

@@ -336,14 +345,14 @@ type cli_match_extra = {
336345
(* EXPERIMENTAL: added by dependency_aware code *)
337346
?sca_info: sca_info option;
338347

339-
(* EXPERIMENTAL: For now, present only for taint findings. May be extended to others
340-
* later on. *)
348+
(* EXPERIMENTAL: For now, present only for taint findings. May be extended to
349+
* otherslater on. *)
341350
?dataflow_trace: match_dataflow_trace option;
342351

343352
?engine_kind: engine_of_finding option;
344353

345-
(* EXPERIMENTAL: If present indicates the status of postprocessor validation. This
346-
* field not being present should be equivalent to No_validator.
354+
(* EXPERIMENTAL: If present indicates the status of postprocessor validation.
355+
* This field not being present should be equivalent to No_validator.
347356
* Added in semgrep 1.37.0 *)
348357
?validation_state: validation_state option;
349358
(* EXPERIMENTAL: added by secrets post-processing & historical scanning code
@@ -360,8 +369,6 @@ type cli_match_extra = {
360369

361370
(* Name/value map of the matched metavariables.
362371
* The leading '$' must be included in the metavariable name.
363-
* TODO: semgrep-core always return a metavars, but dependency_aware Python code
364-
* does not always generate a metavars
365372
*)
366373
type metavars = (string * metavar_value) list
367374
<json repr="object">

0 commit comments

Comments
 (0)