semgrep
diff --git a/‎semgrep_output_v1.atd
Lines changed: 102 additions & 122 deletions b/‎semgrep_output_v1.atd
Lines changed: 102 additions & 122 deletions
@@ -1120,13 +1120,12 @@ type killing_parent = {
  * types used to communicate with the Semgrep backend and are not meant
  * to be consumed directly by Semgrep users or tools wrapping Semgrep.
  *
- * The sequence of HTTP requests is mostly:
- *  - /deployments/current with token
+ * The sequence of HTTP requests for 'semgrep ci' is mostly:
+ *  - /api/agent/deployments/current with token
  *    and response with deployment name in a deployment config
- *  - /deployments/scans when starting a scan, with information about the project
- *    and response with scan_id
- *  - /scans/<scan_id>/config to request the scan config
- *    and response with scan_config including the rules to use
+ *    (TODO? get rid of this one? useful?)
+ *  - /api/cli/scans when starting a scan, with information about the project
+ *    and response with scan_id and scan_config including the rules to use
  *  - /results to send the findings to the backend
  *    and response with errors and task_id
  *  - /complete when done, with the exit code and a few more information
@@ -1186,7 +1185,7 @@ type action = [
 ]
 
 (* ----------------------------- *)
-(* CI Configuration *)
+(* CI configurations *)
 (* ----------------------------- *)
 
 (* Response by the backend to the CLI to the POST deployments/current *)
@@ -1222,21 +1221,16 @@ type ci_config_from_repo = {
 (* ex: "webapp" *)
 type tag = string
 
-(* Response by the backend to the CLI to the POST /scans/<scan_id>/config *)
+(* Response by the backend to the CLI to the POST deployments/scans/config
+ * DEPRECATED? seems to be used only by semgrep lsp.
+ *)
 type scan_config = {
-    (* TODO: remove deployment_id + deployment_name from here, instead this
-     * will be in scan_response.scan_info
-     *)
     deployment_id: int;
     deployment_name: string;
     (* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
     policy_names: string list;
     (* rules raw content in JSON format (but still sent as a string) *)
     rule_config: string;
-    (* since 1.47.0 but not created by the backend (nor used by the CLI) *)
-    ?ci_config_from_cloud: ci_config_from_cloud option;
-
-    (* Deprecated: should rely on ci_config_from_cloud instead *)
     inherit features;
     inherit triage_ignored;
     (* glob patterns *)
@@ -1245,8 +1239,70 @@ type scan_config = {
     ?enabled_products: product list option;
     (* since 1.64.0 *)
     ~actions: action list;
+    (* since 1.47.0 but not created by the backend (nor used by the CLI) *)
+    ?ci_config_from_cloud: ci_config_from_cloud option;
   }
 
+(* Response from the backend to the CLI to the POST /api/cli/scans *)
+type scan_response = {
+    info: scan_info;
+    config: scan_configuration;
+    engine_params: engine_configuration;
+    (* TODO: ~actions: action list; *)
+}
+
+(* meta info about the scan *)
+type scan_info = {
+    ?id: int option; (* the scan id, null for dry-runs *)
+    enabled_products: product list;
+    (* redundant with deployment_config?
+     * TODO? remove the intermediate call to get the deployment and
+     * start a scan to /api/cli/scans/ without first accessing
+     * api/agent/deployments/current?
+     *)
+    deployment_id: int;
+    deployment_name: string;
+}
+
+(* config specific to the scan, eg *)
+type scan_configuration = {
+    rules: raw_json; (* can we type this better *)
+    inherit triage_ignored;
+}
+
+(* settings for the cli *)
+type engine_configuration = {
+    inherit features;
+    (* TODO? glob list? fpath list? *)
+    ~ignored_files: string list;
+    (* from 1.71.0 *)
+    ?product_ignored_files: product_ignored_files option;
+    (* for features we only want to turn on for select customers *)
+    ~generic_slow_rollout: bool;
+    (* from 1.63.0 *)
+    ?historical_config: historical_configuration option;
+    (* from 1.93.
+     * Indicate that fail-open should always be enabled, overriding the CLI flag.
+     * coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
+     *)
+    ~always_suppress_errors: bool;
+}
+
+type glob = string
+
+type product_ignored_files = (product * glob list) list
+  (* We omit the usual <json repr="object"> otherwise we get a
+   * "keys must be strings" error *)
+  <python repr="dict"> <ts repr="map">
+
+(* configuration for scanning version control history,
+ * e.g., looking back at past git commits for committed credentials which may
+ * have been removed *)
+type historical_configuration = {
+    enabled: bool;
+    ?lookback_days: int option;
+}
+                 
 (* ----------------------------- *)
 (* CI Deployment response *)
 (* ----------------------------- *)
@@ -1258,13 +1314,27 @@ type deployment_response = {
 (* CI Scan request *)
 (* ----------------------------- *)
 
+(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
+type scan_request = {
+    (* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
+    project_metadata: project_metadata;
+    scan_metadata: scan_metadata;
+
+    (* added in 1.43 *)
+    ?project_config: ci_config_from_repo option;
+    
+    (* deprecated: moved as an option in 1.100.0 and was duplicative of
+     * information in project_metadata and scan_metadata since 1.43.0
+     * old: 'meta: project_metadata;' before 1.43
+     *)
+    ?meta: raw_json option;
+}
+
 (* Collect information about a project from the environment, filesystem,
  * git repo, etc.
  * See also semgrep_metrics.atd and PRIVACY.md
- *
- * TODO:
- * - we could split it in different parts and use inherit to make things clearer
- *   (while still being backward compatible)
+ * TODO: we could split it in different parts and use inherit to make things
+ * clearer (while still being backward compatible)
  *)
 type project_metadata = {
     (* TODO: deprecate in favor of scan_metadata.cli_version *)
@@ -1353,77 +1423,6 @@ type scan_metadata = {
   ?sms_scan_id: string option;
 }
 
-(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
-type scan_request = {
-    (* added in 1.43 as options, and mandatory since 1.100.0 (replacing meta) *)
-    project_metadata: project_metadata;
-    scan_metadata: scan_metadata;
-
-    (* added in 1.43 *)
-    ?project_config: ci_config_from_repo option;
-    
-    (* deprecated: moved as an option in 1.100.0 and was duplicative of
-     * information in project_metadata and scan_metadata since 1.43.0
-     * old: 'meta: project_metadata;' before 1.43
-     *)
-    ?meta: raw_json option;
-}
-
-(* Response from the backend to the CLI to the POST /api/cli/scans *)
-type scan_response = {
-    info: scan_info;
-    config: scan_configuration;
-    engine_params: engine_configuration;
-}
-
-(* meta info about the scan *)
-type scan_info = {
-    ?id: int option; (* the scan id, null for dry-runs *)
-    enabled_products: product list;
-    deployment_id: int;
-    deployment_name: string;
-}
-
-
-
-(* config specific to the scan, eg *)
-type scan_configuration = {
-    rules: raw_json; (* can we type this better *)
-    inherit triage_ignored;
-}
-
-(* configuration for scanning version control history,
- * e.g., looking back at past git commits for committed credentials which may
- * have been removed *)
-type historical_configuration = {
-    enabled: bool;
-    ?lookback_days: int option;
-}
-
-type glob = string
-
-type product_ignored_files = (product * glob list) list
-  (* We omit the usual <json repr="object"> otherwise we get a
-   * "keys must be strings" error *)
-  <python repr="dict"> <ts repr="map">
-
-(* settings for the cli *)
-type engine_configuration = {
-    inherit features;
-    ~ignored_files: string list;
-    (* from 1.71.0 *)
-    ?product_ignored_files: product_ignored_files option;
-    (* for features we only want to turn on for select customers *)
-    ~generic_slow_rollout: bool;
-    (* from 1.63.0 *)
-    ?historical_config: historical_configuration option;
-    (* from 1.93.
-     * Indicate that fail-open should always be enabled, overriding the CLI flag.
-     * coupling: server/semgrep_app/saas/models/deployment_products_mixin.py
-     *)
-    ~always_suppress_errors: bool;
-}
-
 (* ----------------------------- *)
 (* Findings *)
 (* ----------------------------- *)
@@ -1867,7 +1866,8 @@ type core_error <python decorator="dataclass(frozen=True)"> = {
    those different files (because ATD does not have a proper module system yet).
 *)
 
-type xlang <ocaml attr="deriving show"> = string wrap <ocaml module="Xlang">
+type analyzer <ocaml attr="deriving show"> =
+  string wrap <ocaml module="Analyzer">
 
 (* A target can either be a traditional code target (now with optional
    associated lockfile) or it can be a lockfile target, which will be used to
@@ -1889,10 +1889,10 @@ type target <ocaml attr="deriving show"> = [
 *)
 type code_target <ocaml attr="deriving show"> = {
   path: fpath (* source file *);
-  (* Must be a valid target analyzer as defined in Xlang.mli.
+  (* Must be a valid target analyzer as defined in Analyzer.mli.
      examples: "ocaml", "python", but also "spacegrep" or "regexp".
   *)
-  analyzer: xlang;
+  analyzer: analyzer;
   products: product list;
   ?lockfile_target: lockfile option;
 }
@@ -1939,35 +1939,15 @@ type apply_fixes_return <python decorator="dataclass(frozen=True)"> = {
   fixed_lines: (int * string list) list;
 }
 
-(* TODO: remove many fields so that CallSarifFormat gets close to CallFormatter.
- * I think we just need the 'rules: fpath' as it can't be reconstructed
- * from cli_output.
- *)
-type sarif_format_params <python decorator="dataclass(frozen=True)"> = {
-  (* Path the the rules file *)
+type sarif_format <python decorator="dataclass(frozen=True)"> = {
+  (* Path to the rules file. We need it because rules can't be reconstructed
+   * from cli_output (which is one of the other param of CallSarifFormat) *)
   rules: fpath;
-
-  (* TODO: remove, just use cli_output *)
-  cli_matches: cli_match list;
-  cli_errors: cli_error list;
-  (* TODO: remove, should be able to derive those from format_context *)
-  hide_nudge: bool;
-  engine_label: string;
+  (* TODO? move to format_context? *)
+  is_pro: bool;
   show_dataflow_traces: bool;
 }
 
-(* TODO: remove, reuse RetFormatter *)
-type sarif_format_return <python decorator="dataclass(frozen=True)"> = {
-  (* The formatted output. *)
-  output: string;
-  (* Time (in seconds) it took to format the output.
-   * This helps tracking how much actual formatting time is needed
-   * compared to the whole RPC call, so we can profile and improve
-   * performace when needed.
-   *)
-  format_time_seconds: float;
-}
-
 type output_format
     <ocaml attr="deriving show">
     <python decorator="dataclass(frozen=True)"> =
@@ -2148,9 +2128,9 @@ type resolution_result = [
 type function_call <python decorator="dataclass(frozen=True)"> = [
   | CallContributions
   | CallApplyFixes of apply_fixes_params
-  (* TODO: merge with CallFormatter at some point *)
-  | CallSarifFormat of (format_context * sarif_format_params)
   | CallFormatter of (output_format * format_context * cli_output)
+  (* TODO: merge with CallFormatter at some point *)
+  | CallSarifFormat of (sarif_format * format_context * cli_output)
   (* NOTE: fpath is most likely a temporary file that contains all the rules in
      JSON format. In the future, we could send the rules via a big string through
      the RPC pipe.
@@ -2168,9 +2148,9 @@ type function_return <python decorator="dataclass(frozen=True)"> = [
   | RetError of string
   | RetApplyFixes of apply_fixes_return
   | RetContributions of contributions
-  (* TODO: remove and use RetFormatter for the return of CallSarifFormat too *)
-  | RetSarifFormat of sarif_format_return
   | RetFormatter of string
+  (* alt: reuse RetFormatter *)
+  | RetSarifFormat of string
   | RetValidate of bool
   | RetResolveDependencies of (dependency_source * resolution_result) list
   | RetDumpRulePartitions of bool