Skip to content

Commit 92c5d5d

Browse files
authored
Merge branch 'main' into brandon/import-from-id-info
2 parents f9586f8 + b54e43b commit 92c5d5d

7 files changed

+8813
-9231
lines changed

semgrep_output_v1.atd

Lines changed: 102 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,13 +1120,12 @@ type killing_parent = {
11201120
* types used to communicate with the Semgrep backend and are not meant
11211121
* to be consumed directly by Semgrep users or tools wrapping Semgrep.
11221122
*
1123-
* The sequence of HTTP requests is mostly:
1124-
* - /deployments/current with token
1123+
* The sequence of HTTP requests for 'semgrep ci' is mostly:
1124+
* - /api/agent/deployments/current with token
11251125
* and response with deployment name in a deployment config
1126-
* - /deployments/scans when starting a scan, with information about the project
1127-
* and response with scan_id
1128-
* - /scans/<scan_id>/config to request the scan config
1129-
* and response with scan_config including the rules to use
1126+
* (TODO? get rid of this one? useful?)
1127+
* - /api/cli/scans when starting a scan, with information about the project
1128+
* and response with scan_id and scan_config including the rules to use
11301129
* - /results to send the findings to the backend
11311130
* and response with errors and task_id
11321131
* - /complete when done, with the exit code and a few more information
@@ -1186,7 +1185,7 @@ type action = [
11861185
]
11871186

11881187
(* ----------------------------- *)
1189-
(* CI Configuration *)
1188+
(* CI configurations *)
11901189
(* ----------------------------- *)
11911190

11921191
(* Response by the backend to the CLI to the POST deployments/current *)
@@ -1222,21 +1221,16 @@ type ci_config_from_repo = {
12221221
(* ex: "webapp" *)
12231222
type tag = string
12241223

1225-
(* Response by the backend to the CLI to the POST /scans/<scan_id>/config *)
1224+
(* Response by the backend to the CLI to the POST deployments/scans/config
1225+
* DEPRECATED? seems to be used only by semgrep lsp.
1226+
*)
12261227
type scan_config = {
1227-
(* TODO: remove deployment_id + deployment_name from here, instead this
1228-
* will be in scan_response.scan_info
1229-
*)
12301228
deployment_id: int;
12311229
deployment_name: string;
12321230
(* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
12331231
policy_names: string list;
12341232
(* rules raw content in JSON format (but still sent as a string) *)
12351233
rule_config: string;
1236-
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
1237-
?ci_config_from_cloud: ci_config_from_cloud option;
1238-
1239-
(* Deprecated: should rely on ci_config_from_cloud instead *)
12401234
inherit features;
12411235
inherit triage_ignored;
12421236
(* glob patterns *)
@@ -1245,8 +1239,70 @@ type scan_config = {
12451239
?enabled_products: product list option;
12461240
(* since 1.64.0 *)
12471241
~actions: action list;
1242+
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
1243+
?ci_config_from_cloud: ci_config_from_cloud option;
12481244
}
12491245

1246+
(* Response from the backend to the CLI to the POST /api/cli/scans *)
1247+
type scan_response = {
1248+
info: scan_info;
1249+
config: scan_configuration;
1250+
engine_params: engine_configuration;
1251+
(* TODO: ~actions: action list; *)
1252+
}
1253+
1254+
(* meta info about the scan *)
1255+
type scan_info = {
1256+
?id: int option; (* the scan id, null for dry-runs *)
1257+
enabled_products: product list;
1258+
(* redundant with deployment_config?
1259+
* TODO? remove the intermediate call to get the deployment and
1260+
* start a scan to /api/cli/scans/ without first accessing
1261+
* api/agent/deployments/current?
1262+
*)
1263+
deployment_id: int;
1264+
deployment_name: string;
1265+
}
1266+
1267+
(* config specific to the scan, eg *)
1268+
type scan_configuration = {
1269+
rules: raw_json; (* can we type this better *)
1270+
inherit triage_ignored;
1271+
}
1272+
1273+
(* settings for the cli *)
1274+
type engine_configuration = {
1275+
inherit features;
1276+
(* TODO? glob list? fpath list? *)
1277+
~ignored_files: string list;
1278+
(* from 1.71.0 *)
1279+
?product_ignored_files: product_ignored_files option;
1280+
(* for features we only want to turn on for select customers *)
1281+
~generic_slow_rollout: bool;
1282+
(* from 1.63.0 *)
1283+
?historical_config: historical_configuration option;
1284+
(* from 1.93.
1285+
* Indicate that fail-open should always be enabled, overriding the CLI flag.
1286+
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
1287+
*)
1288+
~always_suppress_errors: bool;
1289+
}
1290+
1291+
type glob = string
1292+
1293+
type product_ignored_files = (product * glob list) list
1294+
(* We omit the usual <json repr="object"> otherwise we get a
1295+
* "keys must be strings" error *)
1296+
<python repr="dict"> <ts repr="map">
1297+
1298+
(* configuration for scanning version control history,
1299+
* e.g., looking back at past git commits for committed credentials which may
1300+
* have been removed *)
1301+
type historical_configuration = {
1302+
enabled: bool;
1303+
?lookback_days: int option;
1304+
}
1305+
12501306
(* ----------------------------- *)
12511307
(* CI Deployment response *)
12521308
(* ----------------------------- *)
@@ -1258,13 +1314,27 @@ type deployment_response = {
12581314
(* CI Scan request *)
12591315
(* ----------------------------- *)
12601316

1317+
(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
1318+
type scan_request = {
1319+
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
1320+
project_metadata: project_metadata;
1321+
scan_metadata: scan_metadata;
1322+
1323+
(* added in 1.43 *)
1324+
?project_config: ci_config_from_repo option;
1325+
1326+
(* deprecated: moved as an option in 1.100.0 and was duplicative of
1327+
* information in project_metadata and scan_metadata since 1.43.0
1328+
* old: 'meta: project_metadata;' before 1.43
1329+
*)
1330+
?meta: raw_json option;
1331+
}
1332+
12611333
(* Collect information about a project from the environment, filesystem,
12621334
* git repo, etc.
12631335
* See also semgrep_metrics.atd and PRIVACY.md
1264-
*
1265-
* TODO:
1266-
* - we could split it in different parts and use inherit to make things clearer
1267-
* (while still being backward compatible)
1336+
* TODO: we could split it in different parts and use inherit to make things
1337+
* clearer (while still being backward compatible)
12681338
*)
12691339
type project_metadata = {
12701340
(* TODO: deprecate in favor of scan_metadata.cli_version *)
@@ -1353,77 +1423,6 @@ type scan_metadata = {
13531423
?sms_scan_id: string option;
13541424
}
13551425

1356-
(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
1357-
type scan_request = {
1358-
(* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
1359-
project_metadata: project_metadata;
1360-
scan_metadata: scan_metadata;
1361-
1362-
(* added in 1.43 *)
1363-
?project_config: ci_config_from_repo option;
1364-
1365-
(* deprecated: moved as an option in 1.100.0 and was duplicative of
1366-
* information in project_metadata and scan_metadata since 1.43.0
1367-
* old: 'meta: project_metadata;' before 1.43
1368-
*)
1369-
?meta: raw_json option;
1370-
}
1371-
1372-
(* Response from the backend to the CLI to the POST /api/cli/scans *)
1373-
type scan_response = {
1374-
info: scan_info;
1375-
config: scan_configuration;
1376-
engine_params: engine_configuration;
1377-
}
1378-
1379-
(* meta info about the scan *)
1380-
type scan_info = {
1381-
?id: int option; (* the scan id, null for dry-runs *)
1382-
enabled_products: product list;
1383-
deployment_id: int;
1384-
deployment_name: string;
1385-
}
1386-
1387-
1388-
1389-
(* config specific to the scan, eg *)
1390-
type scan_configuration = {
1391-
rules: raw_json; (* can we type this better *)
1392-
inherit triage_ignored;
1393-
}
1394-
1395-
(* configuration for scanning version control history,
1396-
* e.g., looking back at past git commits for committed credentials which may
1397-
* have been removed *)
1398-
type historical_configuration = {
1399-
enabled: bool;
1400-
?lookback_days: int option;
1401-
}
1402-
1403-
type glob = string
1404-
1405-
type product_ignored_files = (product * glob list) list
1406-
(* We omit the usual <json repr="object"> otherwise we get a
1407-
* "keys must be strings" error *)
1408-
<python repr="dict"> <ts repr="map">
1409-
1410-
(* settings for the cli *)
1411-
type engine_configuration = {
1412-
inherit features;
1413-
~ignored_files: string list;
1414-
(* from 1.71.0 *)
1415-
?product_ignored_files: product_ignored_files option;
1416-
(* for features we only want to turn on for select customers *)
1417-
~generic_slow_rollout: bool;
1418-
(* from 1.63.0 *)
1419-
?historical_config: historical_configuration option;
1420-
(* from 1.93.
1421-
* Indicate that fail-open should always be enabled, overriding the CLI flag.
1422-
* coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
1423-
*)
1424-
~always_suppress_errors: bool;
1425-
}
1426-
14271426
(* ----------------------------- *)
14281427
(* Findings *)
14291428
(* ----------------------------- *)
@@ -1867,7 +1866,8 @@ type core_error <python decorator="dataclass(frozen=True)"> = {
18671866
those different files (because ATD does not have a proper module system yet).
18681867
*)
18691868

1870-
type xlang <ocaml attr="deriving show"> = string wrap <ocaml module="Xlang">
1869+
type analyzer <ocaml attr="deriving show"> =
1870+
string wrap <ocaml module="Analyzer">
18711871

18721872
(* A target can either be a traditional code target (now with optional
18731873
associated lockfile) or it can be a lockfile target, which will be used to
@@ -1889,10 +1889,10 @@ type target <ocaml attr="deriving show"> = [
18891889
*)
18901890
type code_target <ocaml attr="deriving show"> = {
18911891
path: fpath (* source file *);
1892-
(* Must be a valid target analyzer as defined in Xlang.mli.
1892+
(* Must be a valid target analyzer as defined in Analyzer.mli.
18931893
examples: "ocaml", "python", but also "spacegrep" or "regexp".
18941894
*)
1895-
analyzer: xlang;
1895+
analyzer: analyzer;
18961896
products: product list;
18971897
?lockfile_target: lockfile option;
18981898
}
@@ -1939,35 +1939,15 @@ type apply_fixes_return <python decorator="dataclass(frozen=True)"> = {
19391939
fixed_lines: (int * string list) list;
19401940
}
19411941

1942-
(* TODO: remove many fields so that CallSarifFormat gets close to CallFormatter.
1943-
* I think we just need the 'rules: fpath' as it can't be reconstructed
1944-
* from cli_output.
1945-
*)
1946-
type sarif_format_params <python decorator="dataclass(frozen=True)"> = {
1947-
(* Path the the rules file *)
1942+
type sarif_format <python decorator="dataclass(frozen=True)"> = {
1943+
(* Path to the rules file. We need it because rules can't be reconstructed
1944+
* from cli_output (which is one of the other param of CallSarifFormat) *)
19481945
rules: fpath;
1949-
1950-
(* TODO: remove, just use cli_output *)
1951-
cli_matches: cli_match list;
1952-
cli_errors: cli_error list;
1953-
(* TODO: remove, should be able to derive those from format_context *)
1954-
hide_nudge: bool;
1955-
engine_label: string;
1946+
(* TODO? move to format_context? *)
1947+
is_pro: bool;
19561948
show_dataflow_traces: bool;
19571949
}
19581950

1959-
(* TODO: remove, reuse RetFormatter *)
1960-
type sarif_format_return <python decorator="dataclass(frozen=True)"> = {
1961-
(* The formatted output. *)
1962-
output: string;
1963-
(* Time (in seconds) it took to format the output.
1964-
* This helps tracking how much actual formatting time is needed
1965-
* compared to the whole RPC call, so we can profile and improve
1966-
* performace when needed.
1967-
*)
1968-
format_time_seconds: float;
1969-
}
1970-
19711951
type output_format
19721952
<ocaml attr="deriving show">
19731953
<python decorator="dataclass(frozen=True)"> =
@@ -2148,9 +2128,9 @@ type resolution_result = [
21482128
type function_call <python decorator="dataclass(frozen=True)"> = [
21492129
| CallContributions
21502130
| CallApplyFixes of apply_fixes_params
2151-
(* TODO: merge with CallFormatter at some point *)
2152-
| CallSarifFormat of (format_context * sarif_format_params)
21532131
| CallFormatter of (output_format * format_context * cli_output)
2132+
(* TODO: merge with CallFormatter at some point *)
2133+
| CallSarifFormat of (sarif_format * format_context * cli_output)
21542134
(* NOTE: fpath is most likely a temporary file that contains all the rules in
21552135
JSON format. In the future, we could send the rules via a big string through
21562136
the RPC pipe.
@@ -2168,9 +2148,9 @@ type function_return <python decorator="dataclass(frozen=True)"> = [
21682148
| RetError of string
21692149
| RetApplyFixes of apply_fixes_return
21702150
| RetContributions of contributions
2171-
(* TODO: remove and use RetFormatter for the return of CallSarifFormat too *)
2172-
| RetSarifFormat of sarif_format_return
21732151
| RetFormatter of string
2152+
(* alt: reuse RetFormatter *)
2153+
| RetSarifFormat of string
21742154
| RetValidate of bool
21752155
| RetResolveDependencies of (dependency_source * resolution_result) list
21762156
| RetDumpRulePartitions of bool

0 commit comments

Comments
 (0)