Skip to content

Commit

Permalink
Merge branch 'main' into brandon/import-from-id-info
Browse files Browse the repository at this point in the history
  • Loading branch information
brandonspark authored Jan 15, 2025
2 parents f9586f8 + b54e43b commit 92c5d5d
Show file tree
Hide file tree
Showing 7 changed files with 8,813 additions and 9,231 deletions.
224 changes: 102 additions & 122 deletions semgrep_output_v1.atd
Original file line number Diff line number Diff line change
Expand Up @@ -1120,13 +1120,12 @@ type killing_parent = {
* types used to communicate with the Semgrep backend and are not meant
* to be consumed directly by Semgrep users or tools wrapping Semgrep.
*
* The sequence of HTTP requests is mostly:
* - /deployments/current with token
* The sequence of HTTP requests for 'semgrep ci' is mostly:
* - /api/agent/deployments/current with token
* and response with deployment name in a deployment config
* - /deployments/scans when starting a scan, with information about the project
* and response with scan_id
* - /scans/<scan_id>/config to request the scan config
* and response with scan_config including the rules to use
* (TODO? get rid of this one? useful?)
* - /api/cli/scans when starting a scan, with information about the project
* and response with scan_id and scan_config including the rules to use
* - /results to send the findings to the backend
* and response with errors and task_id
* - /complete when done, with the exit code and a few more information
Expand Down Expand Up @@ -1186,7 +1185,7 @@ type action = [
]

(* ----------------------------- *)
(* CI Configuration *)
(* CI configurations *)
(* ----------------------------- *)

(* Response by the backend to the CLI to the POST deployments/current *)
Expand Down Expand Up @@ -1222,21 +1221,16 @@ type ci_config_from_repo = {
(* ex: "webapp" *)
type tag = string

(* Response by the backend to the CLI to the POST /scans/<scan_id>/config *)
(* Response by the backend to the CLI to the POST deployments/scans/config
* DEPRECATED? seems to be used only by semgrep lsp.
*)
type scan_config = {
(* TODO: remove deployment_id + deployment_name from here, instead this
* will be in scan_response.scan_info
*)
deployment_id: int;
deployment_name: string;
(* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
policy_names: string list;
(* rules raw content in JSON format (but still sent as a string) *)
rule_config: string;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;

(* Deprecated: should rely on ci_config_from_cloud instead *)
inherit features;
inherit triage_ignored;
(* glob patterns *)
Expand All @@ -1245,8 +1239,70 @@ type scan_config = {
?enabled_products: product list option;
(* since 1.64.0 *)
~actions: action list;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
type scan_response = {
info: scan_info;
config: scan_configuration;
engine_params: engine_configuration;
(* TODO: ~actions: action list; *)
}

(* meta info about the scan *)
type scan_info = {
?id: int option; (* the scan id, null for dry-runs *)
enabled_products: product list;
(* redundant with deployment_config?
* TODO? remove the intermediate call to get the deployment and
* start a scan to /api/cli/scans/ without first accessing
* api/agent/deployments/current?
*)
deployment_id: int;
deployment_name: string;
}

(* config specific to the scan, eg *)
type scan_configuration = {
rules: raw_json; (* can we type this better *)
inherit triage_ignored;
}

(* settings for the cli *)
type engine_configuration = {
inherit features;
(* TODO? glob list? fpath list? *)
~ignored_files: string list;
(* from 1.71.0 *)
?product_ignored_files: product_ignored_files option;
(* for features we only want to turn on for select customers *)
~generic_slow_rollout: bool;
(* from 1.63.0 *)
?historical_config: historical_configuration option;
(* from 1.93.
* Indicate that fail-open should always be enabled, overriding the CLI flag.
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
*)
~always_suppress_errors: bool;
}

type glob = string

type product_ignored_files = (product * glob list) list
(* We omit the usual <json repr="object"> otherwise we get a
* "keys must be strings" error *)
<python repr="dict"> <ts repr="map">

(* configuration for scanning version control history,
* e.g., looking back at past git commits for committed credentials which may
* have been removed *)
type historical_configuration = {
enabled: bool;
?lookback_days: int option;
}

(* ----------------------------- *)
(* CI Deployment response *)
(* ----------------------------- *)
Expand All @@ -1258,13 +1314,27 @@ type deployment_response = {
(* CI Scan request *)
(* ----------------------------- *)

(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
type scan_request = {
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
project_metadata: project_metadata;
scan_metadata: scan_metadata;

(* added in 1.43 *)
?project_config: ci_config_from_repo option;

(* deprecated: moved as an option in 1.100.0 and was duplicative of
* information in project_metadata and scan_metadata since 1.43.0
* old: 'meta: project_metadata;' before 1.43
*)
?meta: raw_json option;
}

(* Collect information about a project from the environment, filesystem,
* git repo, etc.
* See also semgrep_metrics.atd and PRIVACY.md
*
* TODO:
* - we could split it in different parts and use inherit to make things clearer
* (while still being backward compatible)
* TODO: we could split it in different parts and use inherit to make things
* clearer (while still being backward compatible)
*)
type project_metadata = {
(* TODO: deprecate in favor of scan_metadata.cli_version *)
Expand Down Expand Up @@ -1353,77 +1423,6 @@ type scan_metadata = {
?sms_scan_id: string option;
}

(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
type scan_request = {
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
project_metadata: project_metadata;
scan_metadata: scan_metadata;

(* added in 1.43 *)
?project_config: ci_config_from_repo option;

(* deprecated: moved as an option in 1.100.0 and was duplicative of
* information in project_metadata and scan_metadata since 1.43.0
* old: 'meta: project_metadata;' before 1.43
*)
?meta: raw_json option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
type scan_response = {
info: scan_info;
config: scan_configuration;
engine_params: engine_configuration;
}

(* meta info about the scan *)
type scan_info = {
?id: int option; (* the scan id, null for dry-runs *)
enabled_products: product list;
deployment_id: int;
deployment_name: string;
}



(* config specific to the scan, eg *)
type scan_configuration = {
rules: raw_json; (* can we type this better *)
inherit triage_ignored;
}

(* configuration for scanning version control history,
* e.g., looking back at past git commits for committed credentials which may
* have been removed *)
type historical_configuration = {
enabled: bool;
?lookback_days: int option;
}

type glob = string

type product_ignored_files = (product * glob list) list
(* We omit the usual <json repr="object"> otherwise we get a
* "keys must be strings" error *)
<python repr="dict"> <ts repr="map">

(* settings for the cli *)
type engine_configuration = {
inherit features;
~ignored_files: string list;
(* from 1.71.0 *)
?product_ignored_files: product_ignored_files option;
(* for features we only want to turn on for select customers *)
~generic_slow_rollout: bool;
(* from 1.63.0 *)
?historical_config: historical_configuration option;
(* from 1.93.
* Indicate that fail-open should always be enabled, overriding the CLI flag.
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
*)
~always_suppress_errors: bool;
}

(* ----------------------------- *)
(* Findings *)
(* ----------------------------- *)
Expand Down Expand Up @@ -1867,7 +1866,8 @@ type core_error <python decorator="dataclass(frozen=True)"> = {
those different files (because ATD does not have a proper module system yet).
*)

type xlang <ocaml attr="deriving show"> = string wrap <ocaml module="Xlang">
type analyzer <ocaml attr="deriving show"> =
string wrap <ocaml module="Analyzer">

(* A target can either be a traditional code target (now with optional
associated lockfile) or it can be a lockfile target, which will be used to
Expand All @@ -1889,10 +1889,10 @@ type target <ocaml attr="deriving show"> = [
*)
type code_target <ocaml attr="deriving show"> = {
path: fpath (* source file *);
(* Must be a valid target analyzer as defined in Xlang.mli.
(* Must be a valid target analyzer as defined in Analyzer.mli.
examples: "ocaml", "python", but also "spacegrep" or "regexp".
*)
analyzer: xlang;
analyzer: analyzer;
products: product list;
?lockfile_target: lockfile option;
}
Expand Down Expand Up @@ -1939,35 +1939,15 @@ type apply_fixes_return <python decorator="dataclass(frozen=True)"> = {
fixed_lines: (int * string list) list;
}

(* TODO: remove many fields so that CallSarifFormat gets close to CallFormatter.
* I think we just need the 'rules: fpath' as it can't be reconstructed
* from cli_output.
*)
type sarif_format_params <python decorator="dataclass(frozen=True)"> = {
(* Path the the rules file *)
type sarif_format <python decorator="dataclass(frozen=True)"> = {
(* Path to the rules file. We need it because rules can't be reconstructed
* from cli_output (which is one of the other param of CallSarifFormat) *)
rules: fpath;

(* TODO: remove, just use cli_output *)
cli_matches: cli_match list;
cli_errors: cli_error list;
(* TODO: remove, should be able to derive those from format_context *)
hide_nudge: bool;
engine_label: string;
(* TODO? move to format_context? *)
is_pro: bool;
show_dataflow_traces: bool;
}

(* TODO: remove, reuse RetFormatter *)
type sarif_format_return <python decorator="dataclass(frozen=True)"> = {
(* The formatted output. *)
output: string;
(* Time (in seconds) it took to format the output.
* This helps tracking how much actual formatting time is needed
* compared to the whole RPC call, so we can profile and improve
* performace when needed.
*)
format_time_seconds: float;
}

type output_format
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> =
Expand Down Expand Up @@ -2148,9 +2128,9 @@ type resolution_result = [
type function_call <python decorator="dataclass(frozen=True)"> = [
| CallContributions
| CallApplyFixes of apply_fixes_params
(* TODO: merge with CallFormatter at some point *)
| CallSarifFormat of (format_context * sarif_format_params)
| CallFormatter of (output_format * format_context * cli_output)
(* TODO: merge with CallFormatter at some point *)
| CallSarifFormat of (sarif_format * format_context * cli_output)
(* NOTE: fpath is most likely a temporary file that contains all the rules in
JSON format. In the future, we could send the rules via a big string through
the RPC pipe.
Expand All @@ -2168,9 +2148,9 @@ type function_return <python decorator="dataclass(frozen=True)"> = [
| RetError of string
| RetApplyFixes of apply_fixes_return
| RetContributions of contributions
(* TODO: remove and use RetFormatter for the return of CallSarifFormat too *)
| RetSarifFormat of sarif_format_return
| RetFormatter of string
(* alt: reuse RetFormatter *)
| RetSarifFormat of string
| RetValidate of bool
| RetResolveDependencies of (dependency_source * resolution_result) list
| RetDumpRulePartitions of bool
Expand Down
Loading

0 comments on commit 92c5d5d

Please sign in to comment.