diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index e535374f..a02303e2 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -1,12 +1,21 @@ (*****************************************************************************) (* Prelude *) (*****************************************************************************) -(* Specification of the Semgrep CLI JSON output format using ATD +(* Specification of the Semgrep CLI JSON output formats using ATD * (see https://atd.readthedocs.io/en/latest/ for more information on ATD). * - * This file specifies the JSON format of the 'semgrep scan --json' command, - * as well as the JSON format of messages sent to the Semgrep backend by the - * 'semgrep ci' command, and the format of 'semgrep test --json'. + * This file specifies the JSON formats of: + * - the output of the 'semgrep scan --json' command + * - the output of the 'semgrep test --json' command + * - the messages sent (and received) to the Semgrep backend by the + * 'semgrep ci' command + * + * There are other important form of outputs which are not specified here: + * - The semgrep metrics sent to https://metrics.semgrep.dev in + * semgrep_metrics.atd + * - The parsing stats of semgrep-core -parsing_stats -json have its own + * Parsing_stats.atd + * - The schema for the generic AST dump is in AST_generic_v1.atd * For the definition of the Semgrep input (the rules), see rule_schema_v2.atd * * This file has the _v1 suffix to explicitely represent the @@ -24,10 +33,11 @@ * should not rely on them as those fields may be changed or removed. * They are not part of the "API" of Semgrep. * - * !!Again, keep in mind that this file is used both by the CLI to *produce* a + * Again, keep in mind that this file is used both by the CLI to *produce* a * JSON output, and by our backends to *consume* the JSON, including to - * consume the JSON produced by old versions of the CLI. As of Sep 2023, - * our backend is still supporting as far as Semgrep 1.17.0!! + * consume the JSON produced by old versions of the CLI. As of Nov 2024, + * our backend is still supporting as far as Semgrep 1.50.0 released Nov 2023. + * (see server/semgrep_app/util/cli_version_support.py in the semgrep-app repo) * * * This file is translated in OCaml modules by atdgen. Look for the @@ -54,13 +64,6 @@ * removing gradually the semgrep-core JSON output. * - it was extended to support 'semgrep ci' output to type most messages * sent between the Semgrep CLI and the Semgrep backend - * - * There are other important form of outputs which are not specified here: - * - The semgrep metrics sent to https://metrics.semgrep.dev in - * semgrep_metrics.atd - * - The parsing stats of semgrep-core -parsing_stats -json have its own - * Parsing_stats.atd - * - The schema for the generic AST dump is in AST_generic_v1.atd *) (* escape hatch *) @@ -70,7 +73,10 @@ type raw_json = abstract (* String aliases *) (*****************************************************************************) -(* File path. less: could convert directly to Path class of pathlib library for Python *) +(* File path. + * less: could convert directly to Path class of pathlib library for Python + * See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap. + *) type fpath = string wrap type uri = string wrap @@ -188,17 +194,19 @@ type pro_feature proprietary_language: bool; } -(* Report the engine used to detect each finding. Additionally, if we are able to - infer that the finding could only be detected using the pro engine, report that - the pro engine is required and include basic information about which feature is - required. +(* Report the engine used to detect each finding. Additionally, if we are able + to infer that the finding could only be detected using the pro engine, + report that the pro engine is required and include basic information about + which feature is required. OSS = ran with OSS - PRO = ran with PRO, but we didn't infer that OSS couldn't have found this finding + PRO = ran with PRO, but we didn't infer that OSS couldn't have found this + finding PRO_REQUIRED = ran with PRO and requires a PRO feature (see pro_feature_used) Note: OSS and PRO could have clearer names, but for backwards compatibility - we're leaving them as is *) + we're leaving them as is +*) type engine_of_finding = [ @@ -303,7 +311,8 @@ type cli_match = { } type cli_match_extra = { - (* TODO: inherit match_extra; but need ?metavars because of dependency_aware code *) + (* Since 1.98.0, you need to be logged in to get this field. + * note: we also need ?metavars because dependency_aware code *) ?metavars: metavars option; (* Those fields are derived from the rule but the metavariables @@ -327,7 +336,7 @@ type cli_match_extra = { metadata: raw_json; severity: match_severity; - (* added by the CLI (see formatter/json.py) *) + (* Since 1.98.0, you need to be logged in to get those fields *) fingerprint: string; lines: string; @@ -336,14 +345,14 @@ type cli_match_extra = { (* EXPERIMENTAL: added by dependency_aware code *) ?sca_info: sca_info option; - (* EXPERIMENTAL: For now, present only for taint findings. May be extended to others - * later on. *) + (* EXPERIMENTAL: For now, present only for taint findings. May be extended to + * otherslater on. *) ?dataflow_trace: match_dataflow_trace option; ?engine_kind: engine_of_finding option; - (* EXPERIMENTAL: If present indicates the status of postprocessor validation. This - * field not being present should be equivalent to No_validator. + (* EXPERIMENTAL: If present indicates the status of postprocessor validation. + * This field not being present should be equivalent to No_validator. * Added in semgrep 1.37.0 *) ?validation_state: validation_state option; (* EXPERIMENTAL: added by secrets post-processing & historical scanning code @@ -360,8 +369,6 @@ type cli_match_extra = { (* Name/value map of the matched metavariables. * The leading '$' must be included in the metavariable name. - * TODO: semgrep-core always return a metavars, but dependency_aware Python code - * does not always generate a metavars *) type metavars = (string * metavar_value) list