Skip to content

Commit

Permalink
cleanup semgrep ci config section (#334)
Browse files Browse the repository at this point in the history
test plan:
make


- [x] I ran `make setup && make` to update the generated code after
editing a `.atd` file (TODO: have a CI check)
- [x] I made sure we're still backward compatible with old versions of
the CLI.
For example, the Semgrep backend need to still be able to *consume* data
	  generated by Semgrep 1.50.0.
See
https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
	  Note that the types related to the semgrep-core JSON output or the
	  semgrep-core RPC do not need to be backward compatible!

Co-authored-by: pad <[email protected]>
  • Loading branch information
aryx and aryx authored Jan 13, 2025
1 parent 48eb841 commit aec607b
Show file tree
Hide file tree
Showing 7 changed files with 477 additions and 478 deletions.
179 changes: 89 additions & 90 deletions semgrep_output_v1.atd
Original file line number Diff line number Diff line change
Expand Up @@ -1120,13 +1120,12 @@ type killing_parent = {
* types used to communicate with the Semgrep backend and are not meant
* to be consumed directly by Semgrep users or tools wrapping Semgrep.
*
* The sequence of HTTP requests is mostly:
* - /deployments/current with token
* The sequence of HTTP requests for 'semgrep ci' is mostly:
* - /api/agent/deployments/current with token
* and response with deployment name in a deployment config
* - /deployments/scans when starting a scan, with information about the project
* and response with scan_id
* - /scans/<scan_id>/config to request the scan config
* and response with scan_config including the rules to use
* (TODO? get rid of this one? useful?)
* - /api/cli/scans when starting a scan, with information about the project
* and response with scan_id and scan_config including the rules to use
* - /results to send the findings to the backend
* and response with errors and task_id
* - /complete when done, with the exit code and a few more information
Expand Down Expand Up @@ -1186,7 +1185,7 @@ type action = [
]

(* ----------------------------- *)
(* CI Configuration *)
(* CI configurations *)
(* ----------------------------- *)

(* Response by the backend to the CLI to the POST deployments/current *)
Expand Down Expand Up @@ -1222,21 +1221,16 @@ type ci_config_from_repo = {
(* ex: "webapp" *)
type tag = string

(* Response by the backend to the CLI to the POST /scans/<scan_id>/config *)
(* Response by the backend to the CLI to the POST deployments/scans/config
* DEPRECATED? seems to be used only by semgrep lsp.
*)
type scan_config = {
(* TODO: remove deployment_id + deployment_name from here, instead this
* will be in scan_response.scan_info
*)
deployment_id: int;
deployment_name: string;
(* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
policy_names: string list;
(* rules raw content in JSON format (but still sent as a string) *)
rule_config: string;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;

(* Deprecated: should rely on ci_config_from_cloud instead *)
inherit features;
inherit triage_ignored;
(* glob patterns *)
Expand All @@ -1245,8 +1239,70 @@ type scan_config = {
?enabled_products: product list option;
(* since 1.64.0 *)
~actions: action list;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
type scan_response = {
info: scan_info;
config: scan_configuration;
engine_params: engine_configuration;
(* TODO: ~actions: action list; *)
}

(* meta info about the scan *)
type scan_info = {
?id: int option; (* the scan id, null for dry-runs *)
enabled_products: product list;
(* redundant with deployment_config?
* TODO? remove the intermediate call to get the deployment and
* start a scan to /api/cli/scans/ without first accessing
* api/agent/deployments/current?
*)
deployment_id: int;
deployment_name: string;
}

(* config specific to the scan, eg *)
type scan_configuration = {
rules: raw_json; (* can we type this better *)
inherit triage_ignored;
}

(* settings for the cli *)
type engine_configuration = {
inherit features;
(* TODO? glob list? fpath list? *)
~ignored_files: string list;
(* from 1.71.0 *)
?product_ignored_files: product_ignored_files option;
(* for features we only want to turn on for select customers *)
~generic_slow_rollout: bool;
(* from 1.63.0 *)
?historical_config: historical_configuration option;
(* from 1.93.
* Indicate that fail-open should always be enabled, overriding the CLI flag.
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
*)
~always_suppress_errors: bool;
}

type glob = string

type product_ignored_files = (product * glob list) list
(* We omit the usual <json repr="object"> otherwise we get a
* "keys must be strings" error *)
<python repr="dict"> <ts repr="map">

(* configuration for scanning version control history,
* e.g., looking back at past git commits for committed credentials which may
* have been removed *)
type historical_configuration = {
enabled: bool;
?lookback_days: int option;
}

(* ----------------------------- *)
(* CI Deployment response *)
(* ----------------------------- *)
Expand All @@ -1258,13 +1314,27 @@ type deployment_response = {
(* CI Scan request *)
(* ----------------------------- *)

(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
type scan_request = {
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
project_metadata: project_metadata;
scan_metadata: scan_metadata;

(* added in 1.43 *)
?project_config: ci_config_from_repo option;

(* deprecated: moved as an option in 1.100.0 and was duplicative of
* information in project_metadata and scan_metadata since 1.43.0
* old: 'meta: project_metadata;' before 1.43
*)
?meta: raw_json option;
}

(* Collect information about a project from the environment, filesystem,
* git repo, etc.
* See also semgrep_metrics.atd and PRIVACY.md
*
* TODO:
* - we could split it in different parts and use inherit to make things clearer
* (while still being backward compatible)
* TODO: we could split it in different parts and use inherit to make things
* clearer (while still being backward compatible)
*)
type project_metadata = {
(* TODO: deprecate in favor of scan_metadata.cli_version *)
Expand Down Expand Up @@ -1353,77 +1423,6 @@ type scan_metadata = {
?sms_scan_id: string option;
}

(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
type scan_request = {
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
project_metadata: project_metadata;
scan_metadata: scan_metadata;

(* added in 1.43 *)
?project_config: ci_config_from_repo option;

(* deprecated: moved as an option in 1.100.0 and was duplicative of
* information in project_metadata and scan_metadata since 1.43.0
* old: 'meta: project_metadata;' before 1.43
*)
?meta: raw_json option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
type scan_response = {
info: scan_info;
config: scan_configuration;
engine_params: engine_configuration;
}

(* meta info about the scan *)
type scan_info = {
?id: int option; (* the scan id, null for dry-runs *)
enabled_products: product list;
deployment_id: int;
deployment_name: string;
}



(* config specific to the scan, eg *)
type scan_configuration = {
rules: raw_json; (* can we type this better *)
inherit triage_ignored;
}

(* configuration for scanning version control history,
* e.g., looking back at past git commits for committed credentials which may
* have been removed *)
type historical_configuration = {
enabled: bool;
?lookback_days: int option;
}

type glob = string

type product_ignored_files = (product * glob list) list
(* We omit the usual <json repr="object"> otherwise we get a
* "keys must be strings" error *)
<python repr="dict"> <ts repr="map">

(* settings for the cli *)
type engine_configuration = {
inherit features;
~ignored_files: string list;
(* from 1.71.0 *)
?product_ignored_files: product_ignored_files option;
(* for features we only want to turn on for select customers *)
~generic_slow_rollout: bool;
(* from 1.63.0 *)
?historical_config: historical_configuration option;
(* from 1.93.
* Indicate that fail-open should always be enabled, overriding the CLI flag.
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
*)
~always_suppress_errors: bool;
}

(* ----------------------------- *)
(* Findings *)
(* ----------------------------- *)
Expand Down
Loading

0 comments on commit aec607b

Please sign in to comment.