From 4156333ba1f5227e6b1f4b391a6d23764c0db8e9 Mon Sep 17 00:00:00 2001 From: Clara McCreery Date: Fri, 1 Nov 2024 13:23:10 -0700 Subject: [PATCH] feat(sms): add scm_run_id to scan-create interface (#306) For SMS, we would like to be able to connect a particular Scan object to the SCM (source control manager) run_id that it is associated with. Details [here](https://www.notion.so/semgrep/SMS-Supportability-1273009241a880579e9ae90928391a0e?pvs=4#1313009241a880d79d77eb0d6773ad56). If we create the SCM run before the scan exists (desirable because there are actually many steps between us receiving a webhook and a Scan object getting created, leading to a significant time delay), then when we create the Scan, we need to know what the SCM run id is in order to connect it to the Scan and update it as the Scan progresses. This interface change allows us to pass the SCM run from the CLI to the backend. Changes in `semgrep` to follow this PR will actually set this value based on an environment variable and then send it to the app. - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.17.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades --- semgrep_output_v1.atd | 3 +++ semgrep_output_v1.jsonschema | 3 ++- semgrep_output_v1.proto | 3 ++- semgrep_output_v1.py | 4 +++ semgrep_output_v1.ts | 3 +++ semgrep_output_v1_j.ml | 52 +++++++++++++++++++++++++++++++++++- semgrep_output_v1_j.mli | 3 ++- 7 files changed, 67 insertions(+), 4 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index c4cf44d7..8a928a84 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -1422,6 +1422,9 @@ type scan_metadata = { unique_id: uuid; (* client generated uuid for the scan *) requested_products: product list; ~dry_run: bool; (* from 1.47.0 *) + (* since 1.96.0 *) + (* unique id associated with the scan run in the source control manager (e.g. github or gitlab) *) + ?scm_run_id: string option; } (* Sent by the CLI to the POST /api/cli/scans to create a scan. *) diff --git a/semgrep_output_v1.jsonschema b/semgrep_output_v1.jsonschema index 02bae7e8..7dd8b9e1 100644 --- a/semgrep_output_v1.jsonschema +++ b/semgrep_output_v1.jsonschema @@ -1118,7 +1118,8 @@ "type": "array", "items": { "$ref": "#/definitions/product" } }, - "dry_run": { "type": "boolean" } + "dry_run": { "type": "boolean" }, + "scm_run_id": { "type": "string" } } }, "scan_request": { diff --git a/semgrep_output_v1.proto b/semgrep_output_v1.proto index d81398cc..5c90e54f 100644 --- a/semgrep_output_v1.proto +++ b/semgrep_output_v1.proto @@ -1,6 +1,6 @@ // Generated by jsonschema2protobuf. DO NOT EDIT! // Source file: semgrep_output_v1.jsonschema -// Source file sha256 digest: 2a59d8d32cec5bbb58993fa0764b2b8738f599a82d3ba653d9d47724bda07dba +// Source file sha256 digest: e7d417445753ed30065525d1aeefbb3c0f43c310a1bb493cb4125bdddf6a6ef5 syntax = "proto3"; @@ -431,6 +431,7 @@ message ScanMetadata { string unique_id = 445148607; repeated google.protobuf.Any requested_products = 6989766; bool dry_run = 151515763; + string scm_run_id = 312927713; } message ScanRequest { diff --git a/semgrep_output_v1.py b/semgrep_output_v1.py index e4f26846..07b5d776 100644 --- a/semgrep_output_v1.py +++ b/semgrep_output_v1.py @@ -3300,6 +3300,7 @@ class ScanMetadata: unique_id: Uuid requested_products: List[Product] dry_run: bool = field(default_factory=lambda: False) + scm_run_id: Optional[str] = None @classmethod def from_json(cls, x: Any) -> 'ScanMetadata': @@ -3309,6 +3310,7 @@ def from_json(cls, x: Any) -> 'ScanMetadata': unique_id=Uuid.from_json(x['unique_id']) if 'unique_id' in x else _atd_missing_json_field('ScanMetadata', 'unique_id'), requested_products=_atd_read_list(Product.from_json)(x['requested_products']) if 'requested_products' in x else _atd_missing_json_field('ScanMetadata', 'requested_products'), dry_run=_atd_read_bool(x['dry_run']) if 'dry_run' in x else False, + scm_run_id=_atd_read_string(x['scm_run_id']) if 'scm_run_id' in x else None, ) else: _atd_bad_json('ScanMetadata', x) @@ -3319,6 +3321,8 @@ def to_json(self) -> Any: res['unique_id'] = (lambda x: x.to_json())(self.unique_id) res['requested_products'] = _atd_write_list((lambda x: x.to_json()))(self.requested_products) res['dry_run'] = _atd_write_bool(self.dry_run) + if self.scm_run_id is not None: + res['scm_run_id'] = _atd_write_string(self.scm_run_id) return res @classmethod diff --git a/semgrep_output_v1.ts b/semgrep_output_v1.ts index 1e329c7f..a7054a11 100644 --- a/semgrep_output_v1.ts +++ b/semgrep_output_v1.ts @@ -628,6 +628,7 @@ export type ScanMetadata = { unique_id: Uuid; requested_products: Product[]; dry_run: boolean; + scm_run_id?: string; } export type ScanRequest = { @@ -2852,6 +2853,7 @@ export function writeScanMetadata(x: ScanMetadata, context: any = x): any { 'unique_id': _atd_write_required_field('ScanMetadata', 'unique_id', writeUuid, x.unique_id, x), 'requested_products': _atd_write_required_field('ScanMetadata', 'requested_products', _atd_write_array(writeProduct), x.requested_products, x), 'dry_run': _atd_write_field_with_default(_atd_write_bool, false, x.dry_run, x), + 'scm_run_id': _atd_write_optional_field(_atd_write_string, x.scm_run_id, x), }; } @@ -2861,6 +2863,7 @@ export function readScanMetadata(x: any, context: any = x): ScanMetadata { unique_id: _atd_read_required_field('ScanMetadata', 'unique_id', readUuid, x['unique_id'], x), requested_products: _atd_read_required_field('ScanMetadata', 'requested_products', _atd_read_array(readProduct), x['requested_products'], x), dry_run: _atd_read_field_with_default(_atd_read_bool, false, x['dry_run'], x), + scm_run_id: _atd_read_optional_field(_atd_read_string, x['scm_run_id'], x), }; } diff --git a/semgrep_output_v1_j.ml b/semgrep_output_v1_j.ml index 47e7fb13..43af8474 100644 --- a/semgrep_output_v1_j.ml +++ b/semgrep_output_v1_j.ml @@ -302,7 +302,8 @@ type scan_metadata = Semgrep_output_v1_t.scan_metadata = { cli_version: version; unique_id: uuid; requested_products: product list; - dry_run: bool + dry_run: bool; + scm_run_id: string option } type project_metadata = Semgrep_output_v1_t.project_metadata = { @@ -11091,6 +11092,17 @@ let write_scan_metadata : _ -> scan_metadata -> _ = ( Yojson.Safe.write_bool ) ob x.dry_run; + (match x.scm_run_id with None -> () | Some x -> + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"scm_run_id\":"; + ( + Yojson.Safe.write_string + ) + ob x; + ); Buffer.add_char ob '}'; ) let string_of_scan_metadata ?(len = 1024) x = @@ -11105,6 +11117,7 @@ let read_scan_metadata = ( let field_unique_id = ref (None) in let field_requested_products = ref (None) in let field_dry_run = ref (false) in + let field_scm_run_id = ref (None) in try Yojson.Safe.read_space p lb; Yojson.Safe.read_object_end lb; @@ -11130,6 +11143,14 @@ let read_scan_metadata = ( -1 ) ) + | 10 -> ( + if String.unsafe_get s pos = 's' && String.unsafe_get s (pos+1) = 'c' && String.unsafe_get s (pos+2) = 'm' && String.unsafe_get s (pos+3) = '_' && String.unsafe_get s (pos+4) = 'r' && String.unsafe_get s (pos+5) = 'u' && String.unsafe_get s (pos+6) = 'n' && String.unsafe_get s (pos+7) = '_' && String.unsafe_get s (pos+8) = 'i' && String.unsafe_get s (pos+9) = 'd' then ( + 4 + ) + else ( + -1 + ) + ) | 11 -> ( if String.unsafe_get s pos = 'c' && String.unsafe_get s (pos+1) = 'l' && String.unsafe_get s (pos+2) = 'i' && String.unsafe_get s (pos+3) = '_' && String.unsafe_get s (pos+4) = 'v' && String.unsafe_get s (pos+5) = 'e' && String.unsafe_get s (pos+6) = 'r' && String.unsafe_get s (pos+7) = 's' && String.unsafe_get s (pos+8) = 'i' && String.unsafe_get s (pos+9) = 'o' && String.unsafe_get s (pos+10) = 'n' then ( 0 @@ -11186,6 +11207,16 @@ let read_scan_metadata = ( ) p lb ); ) + | 4 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_scm_run_id := ( + Some ( + ( + Atdgen_runtime.Oj_run.read_string + ) p lb + ) + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -11215,6 +11246,14 @@ let read_scan_metadata = ( -1 ) ) + | 10 -> ( + if String.unsafe_get s pos = 's' && String.unsafe_get s (pos+1) = 'c' && String.unsafe_get s (pos+2) = 'm' && String.unsafe_get s (pos+3) = '_' && String.unsafe_get s (pos+4) = 'r' && String.unsafe_get s (pos+5) = 'u' && String.unsafe_get s (pos+6) = 'n' && String.unsafe_get s (pos+7) = '_' && String.unsafe_get s (pos+8) = 'i' && String.unsafe_get s (pos+9) = 'd' then ( + 4 + ) + else ( + -1 + ) + ) | 11 -> ( if String.unsafe_get s pos = 'c' && String.unsafe_get s (pos+1) = 'l' && String.unsafe_get s (pos+2) = 'i' && String.unsafe_get s (pos+3) = '_' && String.unsafe_get s (pos+4) = 'v' && String.unsafe_get s (pos+5) = 'e' && String.unsafe_get s (pos+6) = 'r' && String.unsafe_get s (pos+7) = 's' && String.unsafe_get s (pos+8) = 'i' && String.unsafe_get s (pos+9) = 'o' && String.unsafe_get s (pos+10) = 'n' then ( 0 @@ -11271,6 +11310,16 @@ let read_scan_metadata = ( ) p lb ); ) + | 4 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_scm_run_id := ( + Some ( + ( + Atdgen_runtime.Oj_run.read_string + ) p lb + ) + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -11284,6 +11333,7 @@ let read_scan_metadata = ( unique_id = (match !field_unique_id with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "unique_id"); requested_products = (match !field_requested_products with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "requested_products"); dry_run = !field_dry_run; + scm_run_id = !field_scm_run_id; } : scan_metadata) ) diff --git a/semgrep_output_v1_j.mli b/semgrep_output_v1_j.mli index e068f283..f6ee9f1c 100644 --- a/semgrep_output_v1_j.mli +++ b/semgrep_output_v1_j.mli @@ -302,7 +302,8 @@ type scan_metadata = Semgrep_output_v1_t.scan_metadata = { cli_version: version; unique_id: uuid; requested_products: product list; - dry_run: bool + dry_run: bool; + scm_run_id: string option } type project_metadata = Semgrep_output_v1_t.project_metadata = {