Skip to content

Commit

Permalink
Comments on minimum cli version and on gated logged in fields (#314)
Browse files Browse the repository at this point in the history
test plan:
make


- [x] I ran `make setup && make` to update the generated code after
editing a `.atd` file (TODO: have a CI check)
- [x] I made sure we're still backward compatible with old versions of
the CLI.
For example, the Semgrep backend need to still be able to *consume* data
generated
	  by Semgrep 1.17.0.
See
https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
  • Loading branch information
aryx authored Nov 19, 2024
1 parent 1c6af38 commit a95d339
Showing 1 changed file with 36 additions and 29 deletions.
65 changes: 36 additions & 29 deletions semgrep_output_v1.atd
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(* Specification of the Semgrep CLI JSON output format using ATD
(* Specification of the Semgrep CLI JSON output formats using ATD
* (see https://atd.readthedocs.io/en/latest/ for more information on ATD).
*
* This file specifies the JSON format of the 'semgrep scan --json' command,
* as well as the JSON format of messages sent to the Semgrep backend by the
* 'semgrep ci' command, and the format of 'semgrep test --json'.
* This file specifies the JSON formats of:
* - the output of the 'semgrep scan --json' command
* - the output of the 'semgrep test --json' command
* - the messages sent (and received) to the Semgrep backend by the
* 'semgrep ci' command
*
* There are other important form of outputs which are not specified here:
* - The semgrep metrics sent to https://metrics.semgrep.dev in
* semgrep_metrics.atd
* - The parsing stats of semgrep-core -parsing_stats -json have its own
* Parsing_stats.atd
* - The schema for the generic AST dump is in AST_generic_v1.atd
* For the definition of the Semgrep input (the rules), see rule_schema_v2.atd
*
* This file has the _v1 suffix to explicitely represent the
Expand All @@ -24,10 +33,11 @@
* should not rely on them as those fields may be changed or removed.
* They are not part of the "API" of Semgrep.
*
* !!Again, keep in mind that this file is used both by the CLI to *produce* a
* Again, keep in mind that this file is used both by the CLI to *produce* a
* JSON output, and by our backends to *consume* the JSON, including to
* consume the JSON produced by old versions of the CLI. As of Sep 2023,
* our backend is still supporting as far as Semgrep 1.17.0!!
* consume the JSON produced by old versions of the CLI. As of Nov 2024,
* our backend is still supporting as far as Semgrep 1.50.0 released Nov 2023.
* (see server/semgrep_app/util/cli_version_support.py in the semgrep-app repo)
*
*
* This file is translated in OCaml modules by atdgen. Look for the
Expand All @@ -54,13 +64,6 @@
* removing gradually the semgrep-core JSON output.
* - it was extended to support 'semgrep ci' output to type most messages
* sent between the Semgrep CLI and the Semgrep backend
*
* There are other important form of outputs which are not specified here:
* - The semgrep metrics sent to https://metrics.semgrep.dev in
* semgrep_metrics.atd
* - The parsing stats of semgrep-core -parsing_stats -json have its own
* Parsing_stats.atd
* - The schema for the generic AST dump is in AST_generic_v1.atd
*)

(* escape hatch *)
Expand All @@ -70,7 +73,10 @@ type raw_json <ocaml module="Yojson.Basic" t="t"> = abstract
(* String aliases *)
(*****************************************************************************)

(* File path. less: could convert directly to Path class of pathlib library for Python *)
(* File path.
* less: could convert directly to Path class of pathlib library for Python
* See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap.
*)
type fpath <ocaml attr="deriving show, eq"> <python decorator="dataclass(frozen=True)">= string wrap <ocaml module="ATD_string_wrap.Fpath">

type uri = string wrap <ocaml module="ATD_string_wrap.Uri">
Expand Down Expand Up @@ -188,17 +194,19 @@ type pro_feature
proprietary_language: bool;
}

(* Report the engine used to detect each finding. Additionally, if we are able to
infer that the finding could only be detected using the pro engine, report that
the pro engine is required and include basic information about which feature is
required.
(* Report the engine used to detect each finding. Additionally, if we are able
to infer that the finding could only be detected using the pro engine,
report that the pro engine is required and include basic information about
which feature is required.

OSS = ran with OSS
PRO = ran with PRO, but we didn't infer that OSS couldn't have found this finding
PRO = ran with PRO, but we didn't infer that OSS couldn't have found this
finding
PRO_REQUIRED = ran with PRO and requires a PRO feature (see pro_feature_used)

Note: OSS and PRO could have clearer names, but for backwards compatibility
we're leaving them as is *)
we're leaving them as is
*)
type engine_of_finding
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = [
Expand Down Expand Up @@ -303,7 +311,8 @@ type cli_match = {
}

type cli_match_extra = {
(* TODO: inherit match_extra; but need ?metavars because of dependency_aware code *)
(* Since 1.98.0, you need to be logged in to get this field.
* note: we also need ?metavars because dependency_aware code *)
?metavars: metavars option;

(* Those fields are derived from the rule but the metavariables
Expand All @@ -327,7 +336,7 @@ type cli_match_extra = {
metadata: raw_json;
severity: match_severity;

(* added by the CLI (see formatter/json.py) *)
(* Since 1.98.0, you need to be logged in to get those fields *)
fingerprint: string;
lines: string;

Expand All @@ -336,14 +345,14 @@ type cli_match_extra = {
(* EXPERIMENTAL: added by dependency_aware code *)
?sca_info: sca_info option;

(* EXPERIMENTAL: For now, present only for taint findings. May be extended to others
* later on. *)
(* EXPERIMENTAL: For now, present only for taint findings. May be extended to
* otherslater on. *)
?dataflow_trace: match_dataflow_trace option;

?engine_kind: engine_of_finding option;

(* EXPERIMENTAL: If present indicates the status of postprocessor validation. This
* field not being present should be equivalent to No_validator.
(* EXPERIMENTAL: If present indicates the status of postprocessor validation.
* This field not being present should be equivalent to No_validator.
* Added in semgrep 1.37.0 *)
?validation_state: validation_state option;
(* EXPERIMENTAL: added by secrets post-processing & historical scanning code
Expand All @@ -360,8 +369,6 @@ type cli_match_extra = {

(* Name/value map of the matched metavariables.
* The leading '$' must be included in the metavariable name.
* TODO: semgrep-core always return a metavars, but dependency_aware Python code
* does not always generate a metavars
*)
type metavars = (string * metavar_value) list
<json repr="object">
Expand Down

0 comments on commit a95d339

Please sign in to comment.