From e57469291ef2153eabd1c19fb8c6d766a7675144 Mon Sep 17 00:00:00 2001 From: pad Date: Tue, 26 Nov 2024 19:12:46 +0100 Subject: [PATCH 1/5] Make project_metadata mandatory in scan_request Step1 towards getting rid of meta. test plan: see related PR in semgrep-pro --- semgrep_output_v1.atd | 6 +- semgrep_output_v1.jsonschema | 4 +- semgrep_output_v1.proto | 2 +- semgrep_output_v1.py | 21 +-- semgrep_output_v1.ts | 18 +- semgrep_output_v1_j.ml | 339 ++++++++--------------------------- semgrep_output_v1_j.mli | 6 +- 7 files changed, 103 insertions(+), 293 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index 26365abc..9674a772 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -1444,9 +1444,9 @@ type scan_request = { (* until 1.43ish, was 'meta: project_metadata;' before *) meta: raw_json; (* from 1.43 *) - ?project_metadata: project_metadata option; (* replacing meta *) - ?project_config: ci_config_from_repo option; - ?scan_metadata: scan_metadata option; + project_metadata: project_metadata; (* replacing meta *) + project_config: ci_config_from_repo; + scan_metadata: scan_metadata; } (* Response from the backend to the CLI to the POST /api/cli/scans *) diff --git a/semgrep_output_v1.jsonschema b/semgrep_output_v1.jsonschema index 8add2576..949ef085 100644 --- a/semgrep_output_v1.jsonschema +++ b/semgrep_output_v1.jsonschema @@ -1125,7 +1125,9 @@ }, "scan_request": { "type": "object", - "required": [ "meta" ], + "required": [ + "meta", "project_metadata", "project_config", "scan_metadata" + ], "properties": { "meta": { "$ref": "#/definitions/raw_json" }, "project_metadata": { "$ref": "#/definitions/project_metadata" }, diff --git a/semgrep_output_v1.proto b/semgrep_output_v1.proto index e3aae7b2..4e77a20a 100644 --- a/semgrep_output_v1.proto +++ b/semgrep_output_v1.proto @@ -1,6 +1,6 @@ // Generated by jsonschema2protobuf. DO NOT EDIT! // Source file: semgrep_output_v1.jsonschema -// Source file sha256 digest: 1e60b2e61e38c4ee07ad36c0b7baaaebc23a95923552937fec5bcd70d0a4105d +// Source file sha256 digest: cd40dce0f95ac41eac0c823a77f7cec869dd8f230915dc54324b3457a5692f57 syntax = "proto3"; diff --git a/semgrep_output_v1.py b/semgrep_output_v1.py index 8b52323f..eb84e3f2 100644 --- a/semgrep_output_v1.py +++ b/semgrep_output_v1.py @@ -4725,18 +4725,18 @@ class ScanRequest: """Original type: scan_request = { ... }""" meta: RawJson - project_metadata: Optional[ProjectMetadata] = None - project_config: Optional[CiConfigFromRepo] = None - scan_metadata: Optional[ScanMetadata] = None + project_metadata: ProjectMetadata + project_config: CiConfigFromRepo + scan_metadata: ScanMetadata @classmethod def from_json(cls, x: Any) -> 'ScanRequest': if isinstance(x, dict): return cls( meta=RawJson.from_json(x['meta']) if 'meta' in x else _atd_missing_json_field('ScanRequest', 'meta'), - project_metadata=ProjectMetadata.from_json(x['project_metadata']) if 'project_metadata' in x else None, - project_config=CiConfigFromRepo.from_json(x['project_config']) if 'project_config' in x else None, - scan_metadata=ScanMetadata.from_json(x['scan_metadata']) if 'scan_metadata' in x else None, + project_metadata=ProjectMetadata.from_json(x['project_metadata']) if 'project_metadata' in x else _atd_missing_json_field('ScanRequest', 'project_metadata'), + project_config=CiConfigFromRepo.from_json(x['project_config']) if 'project_config' in x else _atd_missing_json_field('ScanRequest', 'project_config'), + scan_metadata=ScanMetadata.from_json(x['scan_metadata']) if 'scan_metadata' in x else _atd_missing_json_field('ScanRequest', 'scan_metadata'), ) else: _atd_bad_json('ScanRequest', x) @@ -4744,12 +4744,9 @@ def from_json(cls, x: Any) -> 'ScanRequest': def to_json(self) -> Any: res: Dict[str, Any] = {} res['meta'] = (lambda x: x.to_json())(self.meta) - if self.project_metadata is not None: - res['project_metadata'] = (lambda x: x.to_json())(self.project_metadata) - if self.project_config is not None: - res['project_config'] = (lambda x: x.to_json())(self.project_config) - if self.scan_metadata is not None: - res['scan_metadata'] = (lambda x: x.to_json())(self.scan_metadata) + res['project_metadata'] = (lambda x: x.to_json())(self.project_metadata) + res['project_config'] = (lambda x: x.to_json())(self.project_config) + res['scan_metadata'] = (lambda x: x.to_json())(self.scan_metadata) return res @classmethod diff --git a/semgrep_output_v1.ts b/semgrep_output_v1.ts index d92e82f0..2a41ae90 100644 --- a/semgrep_output_v1.ts +++ b/semgrep_output_v1.ts @@ -634,9 +634,9 @@ export type ScanMetadata = { export type ScanRequest = { meta: RawJson; - project_metadata?: ProjectMetadata; - project_config?: CiConfigFromRepo; - scan_metadata?: ScanMetadata; + project_metadata: ProjectMetadata; + project_config: CiConfigFromRepo; + scan_metadata: ScanMetadata; } export type ScanResponse = { @@ -2950,18 +2950,18 @@ export function readScanMetadata(x: any, context: any = x): ScanMetadata { export function writeScanRequest(x: ScanRequest, context: any = x): any { return { 'meta': _atd_write_required_field('ScanRequest', 'meta', writeRawJson, x.meta, x), - 'project_metadata': _atd_write_optional_field(writeProjectMetadata, x.project_metadata, x), - 'project_config': _atd_write_optional_field(writeCiConfigFromRepo, x.project_config, x), - 'scan_metadata': _atd_write_optional_field(writeScanMetadata, x.scan_metadata, x), + 'project_metadata': _atd_write_required_field('ScanRequest', 'project_metadata', writeProjectMetadata, x.project_metadata, x), + 'project_config': _atd_write_required_field('ScanRequest', 'project_config', writeCiConfigFromRepo, x.project_config, x), + 'scan_metadata': _atd_write_required_field('ScanRequest', 'scan_metadata', writeScanMetadata, x.scan_metadata, x), }; } export function readScanRequest(x: any, context: any = x): ScanRequest { return { meta: _atd_read_required_field('ScanRequest', 'meta', readRawJson, x['meta'], x), - project_metadata: _atd_read_optional_field(readProjectMetadata, x['project_metadata'], x), - project_config: _atd_read_optional_field(readCiConfigFromRepo, x['project_config'], x), - scan_metadata: _atd_read_optional_field(readScanMetadata, x['scan_metadata'], x), + project_metadata: _atd_read_required_field('ScanRequest', 'project_metadata', readProjectMetadata, x['project_metadata'], x), + project_config: _atd_read_required_field('ScanRequest', 'project_config', readCiConfigFromRepo, x['project_config'], x), + scan_metadata: _atd_read_required_field('ScanRequest', 'scan_metadata', readScanMetadata, x['scan_metadata'], x), }; } diff --git a/semgrep_output_v1_j.ml b/semgrep_output_v1_j.ml index 149b8aad..f1e450d5 100644 --- a/semgrep_output_v1_j.ml +++ b/semgrep_output_v1_j.ml @@ -386,9 +386,9 @@ type ci_config_from_repo = Semgrep_output_v1_t.ci_config_from_repo = { type scan_request = Semgrep_output_v1_t.scan_request = { meta: raw_json; - project_metadata: project_metadata option; - project_config: ci_config_from_repo option; - scan_metadata: scan_metadata option + project_metadata: project_metadata; + project_config: ci_config_from_repo; + scan_metadata: scan_metadata } type ci_env = Semgrep_output_v1_t.ci_env @@ -14501,177 +14501,6 @@ let read_ci_config_from_repo = ( ) let ci_config_from_repo_of_string s = read_ci_config_from_repo (Yojson.Safe.init_lexer ()) (Lexing.from_string s) -let write__scan_metadata_option = ( - Atdgen_runtime.Oj_run.write_std_option ( - write_scan_metadata - ) -) -let string_of__scan_metadata_option ?(len = 1024) x = - let ob = Buffer.create len in - write__scan_metadata_option ob x; - Buffer.contents ob -let read__scan_metadata_option = ( - fun p lb -> - Yojson.Safe.read_space p lb; - match Yojson.Safe.start_any_variant p lb with - | `Edgy_bracket -> ( - match Yojson.Safe.read_ident p lb with - | "None" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (None : _ option) - | "Some" -> - Atdgen_runtime.Oj_run.read_until_field_value p lb; - let x = ( - read_scan_metadata - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Double_quote -> ( - match Yojson.Safe.finish_string p lb with - | "None" -> - (None : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Square_bracket -> ( - match Atdgen_runtime.Oj_run.read_string p lb with - | "Some" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_comma p lb; - Yojson.Safe.read_space p lb; - let x = ( - read_scan_metadata - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_rbr p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) -) -let _scan_metadata_option_of_string s = - read__scan_metadata_option (Yojson.Safe.init_lexer ()) (Lexing.from_string s) -let write__project_metadata_option = ( - Atdgen_runtime.Oj_run.write_std_option ( - write_project_metadata - ) -) -let string_of__project_metadata_option ?(len = 1024) x = - let ob = Buffer.create len in - write__project_metadata_option ob x; - Buffer.contents ob -let read__project_metadata_option = ( - fun p lb -> - Yojson.Safe.read_space p lb; - match Yojson.Safe.start_any_variant p lb with - | `Edgy_bracket -> ( - match Yojson.Safe.read_ident p lb with - | "None" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (None : _ option) - | "Some" -> - Atdgen_runtime.Oj_run.read_until_field_value p lb; - let x = ( - read_project_metadata - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Double_quote -> ( - match Yojson.Safe.finish_string p lb with - | "None" -> - (None : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Square_bracket -> ( - match Atdgen_runtime.Oj_run.read_string p lb with - | "Some" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_comma p lb; - Yojson.Safe.read_space p lb; - let x = ( - read_project_metadata - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_rbr p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) -) -let _project_metadata_option_of_string s = - read__project_metadata_option (Yojson.Safe.init_lexer ()) (Lexing.from_string s) -let write__ci_config_from_repo_option = ( - Atdgen_runtime.Oj_run.write_std_option ( - write_ci_config_from_repo - ) -) -let string_of__ci_config_from_repo_option ?(len = 1024) x = - let ob = Buffer.create len in - write__ci_config_from_repo_option ob x; - Buffer.contents ob -let read__ci_config_from_repo_option = ( - fun p lb -> - Yojson.Safe.read_space p lb; - match Yojson.Safe.start_any_variant p lb with - | `Edgy_bracket -> ( - match Yojson.Safe.read_ident p lb with - | "None" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (None : _ option) - | "Some" -> - Atdgen_runtime.Oj_run.read_until_field_value p lb; - let x = ( - read_ci_config_from_repo - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_gt p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Double_quote -> ( - match Yojson.Safe.finish_string p lb with - | "None" -> - (None : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) - | `Square_bracket -> ( - match Atdgen_runtime.Oj_run.read_string p lb with - | "Some" -> - Yojson.Safe.read_space p lb; - Yojson.Safe.read_comma p lb; - Yojson.Safe.read_space p lb; - let x = ( - read_ci_config_from_repo - ) p lb - in - Yojson.Safe.read_space p lb; - Yojson.Safe.read_rbr p lb; - (Some x : _ option) - | x -> - Atdgen_runtime.Oj_run.invalid_variant_tag p x - ) -) -let _ci_config_from_repo_option_of_string s = - read__ci_config_from_repo_option (Yojson.Safe.init_lexer ()) (Lexing.from_string s) let write_scan_request : _ -> scan_request -> _ = ( fun ob (x : scan_request) -> Buffer.add_char ob '{'; @@ -14685,39 +14514,33 @@ let write_scan_request : _ -> scan_request -> _ = ( write_raw_json ) ob x.meta; - (match x.project_metadata with None -> () | Some x -> - if !is_first then - is_first := false - else - Buffer.add_char ob ','; - Buffer.add_string ob "\"project_metadata\":"; - ( - write_project_metadata - ) - ob x; - ); - (match x.project_config with None -> () | Some x -> - if !is_first then - is_first := false - else - Buffer.add_char ob ','; - Buffer.add_string ob "\"project_config\":"; - ( - write_ci_config_from_repo - ) - ob x; - ); - (match x.scan_metadata with None -> () | Some x -> - if !is_first then - is_first := false - else - Buffer.add_char ob ','; - Buffer.add_string ob "\"scan_metadata\":"; - ( - write_scan_metadata - ) - ob x; - ); + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"project_metadata\":"; + ( + write_project_metadata + ) + ob x.project_metadata; + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"project_config\":"; + ( + write_ci_config_from_repo + ) + ob x.project_config; + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"scan_metadata\":"; + ( + write_scan_metadata + ) + ob x.scan_metadata; Buffer.add_char ob '}'; ) let string_of_scan_request ?(len = 1024) x = @@ -14790,35 +14613,29 @@ let read_scan_request = ( ) ); | 1 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_project_metadata := ( - Some ( - ( - read_project_metadata - ) p lb - ) - ); - ) + field_project_metadata := ( + Some ( + ( + read_project_metadata + ) p lb + ) + ); | 2 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_project_config := ( - Some ( - ( - read_ci_config_from_repo - ) p lb - ) - ); - ) + field_project_config := ( + Some ( + ( + read_ci_config_from_repo + ) p lb + ) + ); | 3 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_scan_metadata := ( - Some ( - ( - read_scan_metadata - ) p lb - ) - ); - ) + field_scan_metadata := ( + Some ( + ( + read_scan_metadata + ) p lb + ) + ); | _ -> ( Yojson.Safe.skip_json p lb ) @@ -14881,35 +14698,29 @@ let read_scan_request = ( ) ); | 1 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_project_metadata := ( - Some ( - ( - read_project_metadata - ) p lb - ) - ); - ) + field_project_metadata := ( + Some ( + ( + read_project_metadata + ) p lb + ) + ); | 2 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_project_config := ( - Some ( - ( - read_ci_config_from_repo - ) p lb - ) - ); - ) + field_project_config := ( + Some ( + ( + read_ci_config_from_repo + ) p lb + ) + ); | 3 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_scan_metadata := ( - Some ( - ( - read_scan_metadata - ) p lb - ) - ); - ) + field_scan_metadata := ( + Some ( + ( + read_scan_metadata + ) p lb + ) + ); | _ -> ( Yojson.Safe.skip_json p lb ) @@ -14920,9 +14731,9 @@ let read_scan_request = ( ( { meta = (match !field_meta with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "meta"); - project_metadata = !field_project_metadata; - project_config = !field_project_config; - scan_metadata = !field_scan_metadata; + project_metadata = (match !field_project_metadata with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "project_metadata"); + project_config = (match !field_project_config with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "project_config"); + scan_metadata = (match !field_scan_metadata with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "scan_metadata"); } : scan_request) ) diff --git a/semgrep_output_v1_j.mli b/semgrep_output_v1_j.mli index 7effeeb1..a11a9e54 100644 --- a/semgrep_output_v1_j.mli +++ b/semgrep_output_v1_j.mli @@ -386,9 +386,9 @@ type ci_config_from_repo = Semgrep_output_v1_t.ci_config_from_repo = { type scan_request = Semgrep_output_v1_t.scan_request = { meta: raw_json; - project_metadata: project_metadata option; - project_config: ci_config_from_repo option; - scan_metadata: scan_metadata option + project_metadata: project_metadata; + project_config: ci_config_from_repo; + scan_metadata: scan_metadata } type ci_env = Semgrep_output_v1_t.ci_env From d017faf59c1a714a98a236d00ffcf2a26c8e344b Mon Sep 17 00:00:00 2001 From: pad Date: Wed, 27 Nov 2024 08:44:54 +0100 Subject: [PATCH 2/5] more --- semgrep_output_v1.atd | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index 9674a772..67d82544 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -7,7 +7,7 @@ * This file specifies the JSON formats of: * - the output of the 'semgrep scan --json' command * - the output of the 'semgrep test --json' command - * - the messages sent (and received) to the Semgrep backend by the + * - the messages exchanged with the Semgrep backend by the * 'semgrep ci' command * * There are other important form of outputs which are not specified here: @@ -77,7 +77,10 @@ type raw_json = abstract * less: could convert directly to Path class of pathlib library for Python * See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap. *) -type fpath = string wrap +type fpath + + = + string wrap type uri = string wrap @@ -104,10 +107,8 @@ type position col: int; (* starts from 1 *) (* Byte position from the beginning of the file, starts at 0. * OCaml code sets it correctly. Python code sets it to a dummy value (-1). - * This uses '~' because semgrep < 1.30? was *producing* positions without - * offset sometimes, and we want the backend to still *consume* such positions. *) - ~offset: int; + offset: int; } (* a.k.a range *) @@ -553,7 +554,7 @@ type core_error = { ?details: string option; ?location: location option; ?rule_id: rule_id option; - } +} (* ----------------------------- *) (* CLI *) @@ -869,8 +870,8 @@ type cli_output_extra = { *) ?explanations: matching_explanation list option; - (* These rules, classified by engine used, will let us be transparent in the CLI - * output over what rules were run with what. + (* These rules, classified by engine used, will let us be transparent in + * the CLI output over what rules were run with what. * EXPERIMENTAL: since: 1.11.0 *) ?rules_by_engine: rule_id_and_engine_kind list option; @@ -1441,12 +1442,14 @@ type scan_metadata = { (* Sent by the CLI to the POST /api/cli/scans to create a scan. *) type scan_request = { - (* until 1.43ish, was 'meta: project_metadata;' before *) - meta: raw_json; - (* from 1.43 *) - project_metadata: project_metadata; (* replacing meta *) + (* added in 1.43 as options, and made mandatory since 1.98.0 *) + project_metadata: project_metadata; (* replacing meta below *) project_config: ci_config_from_repo; scan_metadata: scan_metadata; + (* deprecated: moved as an option in 1.98.0 and was used until 1.43ish + * old: 'meta: project_metadata;' before 1.43 + *) + ?meta: raw_json option; } (* Response from the backend to the CLI to the POST /api/cli/scans *) From b8cfc55a2a35a2269ee62f5df345f2f6741494b0 Mon Sep 17 00:00:00 2001 From: pad Date: Wed, 27 Nov 2024 08:50:54 +0100 Subject: [PATCH 3/5] update template --- .github/pull_request_template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 01ae6ae0..efb7d40b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,5 @@ - [ ] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [ ] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated - by Semgrep 1.17.0. + by Semgrep 1.50.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades From d8b941956eef86ed43e0e1d50b5fe56d318837a5 Mon Sep 17 00:00:00 2001 From: pad Date: Thu, 28 Nov 2024 09:42:59 +0100 Subject: [PATCH 4/5] put back project_config as an option and few cleanups --- semgrep_output_v1.atd | 121 ++++++++++--------- semgrep_output_v1.jsonschema | 82 +++++++------ semgrep_output_v1.proto | 16 +-- semgrep_output_v1.py | 18 +-- semgrep_output_v1.ts | 190 +++++++++++++++--------------- semgrep_output_v1_j.ml | 219 +++++++++++++++++++++++------------ semgrep_output_v1_j.mli | 6 +- 7 files changed, 366 insertions(+), 286 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index 67d82544..d74a99e2 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -144,8 +144,8 @@ type rule_id coupling: with 'severity' in 'rule_schema_v2.atd' *) type match_severity - - = [ + + = [ | Error | Warning | Experiment @@ -174,8 +174,8 @@ type match_severity alt: could reuse match_severity but seems cleaner to define its own type *) type error_severity - - = [ + + = [ | Error | Warning | Info @@ -188,11 +188,11 @@ type error_severity Interfile_taint = requires interfile taint Other_pro_feature = requires some non-taint pro feature *) type pro_feature - - = { - interproc_taint: bool; - interfile_taint: bool; - proprietary_language: bool; + + = { + interproc_taint: bool; + interfile_taint: bool; + proprietary_language: bool; } (* Report the engine used to detect each finding. Additionally, if we are able @@ -209,8 +209,8 @@ type pro_feature we're leaving them as is *) type engine_of_finding - - = [ + + = [ | OSS | PRO (* Semgrep 1.64.0 or later *) @@ -218,8 +218,8 @@ type engine_of_finding ] type engine_kind - - = [ + + = [ | OSS | PRO ] @@ -467,8 +467,8 @@ type match_intermediate_var = { * Error.string_of_error_type() for osemgrep. *) type error_type - - = [ + + = [ (* File parsing related errors; coupling: if you add a target parse error then metrics for cli need to be updated. See cli/src/semgrep/parsing_data.py. @@ -1061,6 +1061,7 @@ type todo = int (* This is also known as Software Composition Analysis (SCA) *) (* EXPERIMENTAL *) +(* part of cli_match_extra *) type sca_info = { reachable: bool; reachability_rule: bool; @@ -1074,11 +1075,19 @@ type dependency_match = { lockfile: string; } +type dependency_pattern = { + ecosystem: ecosystem; + package: string; + semver_range: string; +} + (* both ecosystem and transitivity below have frozen=True so the generated * classes can be hashed and put in sets (see calls to reachable_deps.add() * in semgrep SCA code) *) -type ecosystem = [ +type ecosystem + + = [ | Npm | Pypi | Gem @@ -1094,23 +1103,6 @@ type ecosystem ] -type transitivity = [ - | Direct - | Transitive - | Unknown -] - -type dependency_pattern = { - ecosystem: ecosystem; - package: string; - semver_range: string; -} - -type dependency_child = { - package: string; - version: string; -} - type found_dependency = { package: string; version: string; @@ -1147,7 +1139,35 @@ type found_dependency = { ?git_ref: string option; } -(* json names are to maintain backwards compatibility with the python enum it is replacing *) +type transitivity + + = [ + | Direct + | Transitive + | Unknown +] + + +type dependency_child = { + package: string; + version: string; +} + +(* Used in ci_scan_complete *) +type dependency_parser_error = { + path: string; + parser: sca_parser_name; + reason: string; + (* Not using `position` because this type must be backwards compatible with the python + * class it is replacing. + *) + ?line: int option; + ?col: int option; + ?text: string option; +} + +(* json names are to maintain backwards compatibility with the python enum it is + * replacing *) type sca_parser_name = [ | Gemfile_lock | Go_mod @@ -1171,17 +1191,6 @@ type sca_parser_name = [ | Mix_lock ] -type dependency_parser_error = { - path: string; - parser: sca_parser_name; - reason: string; - (* Not using `position` because this type must be backwards compatible with the python - * class it is replacing. - *) - ?line: int option; - ?col: int option; - ?text: string option; -} (*****************************************************************************) (* Semgrep Secrets *) @@ -1205,8 +1214,8 @@ type historical_info = { (*****************************************************************************) (* EXPERIMENTAL: do not rely on the types in this section; those are internal - * types used to communicate with the Semgrep backend and are not meant - * to be consumed directly by Semgrep users or tools wrapping up Semgrep. + * types used to communicate with the Semgrep App backend and are not meant + * to be consumed directly by Semgrep users or tools wrapping Semgrep. * * The sequence of HTTP requests is mostly: * - /deployments/current with token @@ -1294,7 +1303,7 @@ type deployment_config = { inherit has_features; } -(* Content of the .semgrepconfig.yml in the repository. +(* Content of a possible .semgrepconfig.yml in the repository. * * This config allows to configure Semgrep per repo, e.g., to store * a category/tag like "webapp" in a repo so that the Semgrep WebApp can @@ -1302,11 +1311,11 @@ type deployment_config = { * later when given this ci_config_from_repo in the scan_request. *) type ci_config_from_repo = { - (* version of the .semgrepconfig.yml format. "V1" right now (useful?) *) + (* version of the .semgrepconfig.yml format. "v1" right now (useful?) *) ~version : version; ?tags: tag list option; } -(* ?? ex? *) +(* ex: "webapp" *) type tag = string (* Response by the backend to the CLI to the POST /scans//config *) @@ -1320,7 +1329,7 @@ type scan_config = { policy_names: string list; (* rules raw content in JSON format (but still sent as a string) *) rule_config: string; - (* since 1.47.0 *) + (* since 1.47.0 but not created by the backend (nor used by the CLI) *) ?ci_config_from_cloud: ci_config_from_cloud option; (* Deprecated: should rely on ci_config_from_cloud instead *) @@ -1442,10 +1451,11 @@ type scan_metadata = { (* Sent by the CLI to the POST /api/cli/scans to create a scan. *) type scan_request = { - (* added in 1.43 as options, and made mandatory since 1.98.0 *) - project_metadata: project_metadata; (* replacing meta below *) - project_config: ci_config_from_repo; + (* added in 1.43 as options, and mandatory since 1.98.0 (replacing meta) *) + project_metadata: project_metadata; scan_metadata: scan_metadata; + (* added in 1.43 (used to be in meta) *) + ?project_config: ci_config_from_repo option; (* deprecated: moved as an option in 1.98.0 and was used until 1.43ish * old: 'meta: project_metadata;' before 1.43 *) @@ -1650,7 +1660,8 @@ type ci_scan_results_response_error = { type ci_scan_complete = { exit_code: int; stats: ci_scan_complete_stats; - ?dependencies: ci_scan_dependencies option; (* remove when min version is 1.38.0 *) + (* TODO: remove dependencies when min version is 1.38.0 *) + ?dependencies: ci_scan_dependencies option; ?dependency_parser_errors: dependency_parser_error list option; (* since 1.31.0 *) ?task_id: string option; diff --git a/semgrep_output_v1.jsonschema b/semgrep_output_v1.jsonschema index 949ef085..f3d1badf 100644 --- a/semgrep_output_v1.jsonschema +++ b/semgrep_output_v1.jsonschema @@ -44,7 +44,7 @@ "version": { "type": "string" }, "position": { "type": "object", - "required": [ "line", "col" ], + "required": [ "line", "col", "offset" ], "properties": { "line": { "type": "integer" }, "col": { "type": "integer" }, @@ -830,6 +830,15 @@ "lockfile": { "type": "string" } } }, + "dependency_pattern": { + "type": "object", + "required": [ "ecosystem", "package", "semver_range" ], + "properties": { + "ecosystem": { "$ref": "#/definitions/ecosystem" }, + "package": { "type": "string" }, + "semver_range": { "type": "string" } + } + }, "ecosystem": { "oneOf": [ { "const": "npm" }, @@ -846,30 +855,6 @@ { "const": "hex" } ] }, - "transitivity": { - "oneOf": [ - { "const": "direct" }, - { "const": "transitive" }, - { "const": "unknown" } - ] - }, - "dependency_pattern": { - "type": "object", - "required": [ "ecosystem", "package", "semver_range" ], - "properties": { - "ecosystem": { "$ref": "#/definitions/ecosystem" }, - "package": { "type": "string" }, - "semver_range": { "type": "string" } - } - }, - "dependency_child": { - "type": "object", - "required": [ "package", "version" ], - "properties": { - "package": { "type": "string" }, - "version": { "type": "string" } - } - }, "found_dependency": { "type": "object", "required": [ @@ -898,6 +883,33 @@ "git_ref": { "type": "string" } } }, + "transitivity": { + "oneOf": [ + { "const": "direct" }, + { "const": "transitive" }, + { "const": "unknown" } + ] + }, + "dependency_child": { + "type": "object", + "required": [ "package", "version" ], + "properties": { + "package": { "type": "string" }, + "version": { "type": "string" } + } + }, + "dependency_parser_error": { + "type": "object", + "required": [ "path", "parser", "reason" ], + "properties": { + "path": { "type": "string" }, + "parser": { "$ref": "#/definitions/sca_parser_name" }, + "reason": { "type": "string" }, + "line": { "type": "integer" }, + "col": { "type": "integer" }, + "text": { "type": "string" } + } + }, "sca_parser_name": { "oneOf": [ { "const": "gemfile_lock" }, @@ -922,18 +934,6 @@ { "const": "mix_lock" } ] }, - "dependency_parser_error": { - "type": "object", - "required": [ "path", "parser", "reason" ], - "properties": { - "path": { "type": "string" }, - "parser": { "$ref": "#/definitions/sca_parser_name" }, - "reason": { "type": "string" }, - "line": { "type": "integer" }, - "col": { "type": "integer" }, - "text": { "type": "string" } - } - }, "historical_info": { "type": "object", "required": [ "git_commit", "git_commit_timestamp" ], @@ -1125,14 +1125,12 @@ }, "scan_request": { "type": "object", - "required": [ - "meta", "project_metadata", "project_config", "scan_metadata" - ], + "required": [ "project_metadata", "scan_metadata" ], "properties": { - "meta": { "$ref": "#/definitions/raw_json" }, "project_metadata": { "$ref": "#/definitions/project_metadata" }, + "scan_metadata": { "$ref": "#/definitions/scan_metadata" }, "project_config": { "$ref": "#/definitions/ci_config_from_repo" }, - "scan_metadata": { "$ref": "#/definitions/scan_metadata" } + "meta": { "$ref": "#/definitions/raw_json" } } }, "scan_response": { diff --git a/semgrep_output_v1.proto b/semgrep_output_v1.proto index 4e77a20a..287cdf82 100644 --- a/semgrep_output_v1.proto +++ b/semgrep_output_v1.proto @@ -1,6 +1,6 @@ // Generated by jsonschema2protobuf. DO NOT EDIT! // Source file: semgrep_output_v1.jsonschema -// Source file sha256 digest: cd40dce0f95ac41eac0c823a77f7cec869dd8f230915dc54324b3457a5692f57 +// Source file sha256 digest: 0c194c187f09f172cfec5d6fe181725ee6e5c4cd5463f91a6d0843bc61e61149 syntax = "proto3"; @@ -303,11 +303,6 @@ message DependencyPattern { string semver_range = 154778121; } -message DependencyChild { - string package = 337745074; - string version = 508888787; -} - message FoundDependency { string package = 337745074; string version = 508888787; @@ -322,6 +317,11 @@ message FoundDependency { string git_ref = 109607572; } +message DependencyChild { + string package = 337745074; + string version = 508888787; +} + message DependencyParserError { string path = 3212859; google.protobuf.Any parser = 139312007; @@ -436,10 +436,10 @@ message ScanMetadata { } message ScanRequest { - google.protobuf.Any meta = 3004443; ProjectMetadata project_metadata = 24255885; - CiConfigFromRepo project_config = 317758767; ScanMetadata scan_metadata = 76122041; + CiConfigFromRepo project_config = 317758767; + google.protobuf.Any meta = 3004443; } message ScanResponse { diff --git a/semgrep_output_v1.py b/semgrep_output_v1.py index eb84e3f2..afbcb68d 100644 --- a/semgrep_output_v1.py +++ b/semgrep_output_v1.py @@ -787,7 +787,7 @@ class Position: line: int col: int - offset: int = field(default_factory=lambda: 0) + offset: int @classmethod def from_json(cls, x: Any) -> 'Position': @@ -795,7 +795,7 @@ def from_json(cls, x: Any) -> 'Position': return cls( line=_atd_read_int(x['line']) if 'line' in x else _atd_missing_json_field('Position', 'line'), col=_atd_read_int(x['col']) if 'col' in x else _atd_missing_json_field('Position', 'col'), - offset=_atd_read_int(x['offset']) if 'offset' in x else 0, + offset=_atd_read_int(x['offset']) if 'offset' in x else _atd_missing_json_field('Position', 'offset'), ) else: _atd_bad_json('Position', x) @@ -4724,29 +4724,31 @@ def to_json_string(self, **kw: Any) -> str: class ScanRequest: """Original type: scan_request = { ... }""" - meta: RawJson project_metadata: ProjectMetadata - project_config: CiConfigFromRepo scan_metadata: ScanMetadata + project_config: Optional[CiConfigFromRepo] = None + meta: Optional[RawJson] = None @classmethod def from_json(cls, x: Any) -> 'ScanRequest': if isinstance(x, dict): return cls( - meta=RawJson.from_json(x['meta']) if 'meta' in x else _atd_missing_json_field('ScanRequest', 'meta'), project_metadata=ProjectMetadata.from_json(x['project_metadata']) if 'project_metadata' in x else _atd_missing_json_field('ScanRequest', 'project_metadata'), - project_config=CiConfigFromRepo.from_json(x['project_config']) if 'project_config' in x else _atd_missing_json_field('ScanRequest', 'project_config'), scan_metadata=ScanMetadata.from_json(x['scan_metadata']) if 'scan_metadata' in x else _atd_missing_json_field('ScanRequest', 'scan_metadata'), + project_config=CiConfigFromRepo.from_json(x['project_config']) if 'project_config' in x else None, + meta=RawJson.from_json(x['meta']) if 'meta' in x else None, ) else: _atd_bad_json('ScanRequest', x) def to_json(self) -> Any: res: Dict[str, Any] = {} - res['meta'] = (lambda x: x.to_json())(self.meta) res['project_metadata'] = (lambda x: x.to_json())(self.project_metadata) - res['project_config'] = (lambda x: x.to_json())(self.project_config) res['scan_metadata'] = (lambda x: x.to_json())(self.scan_metadata) + if self.project_config is not None: + res['project_config'] = (lambda x: x.to_json())(self.project_config) + if self.meta is not None: + res['meta'] = (lambda x: x.to_json())(self.meta) return res @classmethod diff --git a/semgrep_output_v1.ts b/semgrep_output_v1.ts index 2a41ae90..8ab7381d 100644 --- a/semgrep_output_v1.ts +++ b/semgrep_output_v1.ts @@ -446,6 +446,12 @@ export type DependencyMatch = { lockfile: string; } +export type DependencyPattern = { + ecosystem: Ecosystem; + package_: string; + semver_range: string; +} + export type Ecosystem = | { kind: 'Npm' /* JSON: "npm" */ } | { kind: 'Pypi' /* JSON: "pypi" */ } @@ -460,22 +466,6 @@ export type Ecosystem = | { kind: 'Mix' /* JSON: "mix" */ } | { kind: 'Hex' /* JSON: "hex" */ } -export type Transitivity = -| { kind: 'Direct' /* JSON: "direct" */ } -| { kind: 'Transitive' /* JSON: "transitive" */ } -| { kind: 'Unknown' /* JSON: "unknown" */ } - -export type DependencyPattern = { - ecosystem: Ecosystem; - package_: string; - semver_range: string; -} - -export type DependencyChild = { - package_: string; - version: string; -} - export type FoundDependency = { package_: string; version: string; @@ -490,6 +480,25 @@ export type FoundDependency = { git_ref?: string; } +export type Transitivity = +| { kind: 'Direct' /* JSON: "direct" */ } +| { kind: 'Transitive' /* JSON: "transitive" */ } +| { kind: 'Unknown' /* JSON: "unknown" */ } + +export type DependencyChild = { + package_: string; + version: string; +} + +export type DependencyParserError = { + path: string; + parser: ScaParserName; + reason: string; + line?: number /*int*/; + col?: number /*int*/; + text?: string; +} + export type ScaParserName = | { kind: 'Gemfile_lock' /* JSON: "gemfile_lock" */ } | { kind: 'Go_mod' /* JSON: "go_mod" */ } @@ -512,15 +521,6 @@ export type ScaParserName = | { kind: 'Package_resolved' /* JSON: "package_resolved" */ } | { kind: 'Mix_lock' /* JSON: "mix_lock" */ } -export type DependencyParserError = { - path: string; - parser: ScaParserName; - reason: string; - line?: number /*int*/; - col?: number /*int*/; - text?: string; -} - export type HistoricalInfo = { git_commit: Sha1; git_blob?: Sha1; @@ -633,10 +633,10 @@ export type ScanMetadata = { } export type ScanRequest = { - meta: RawJson; project_metadata: ProjectMetadata; - project_config: CiConfigFromRepo; scan_metadata: ScanMetadata; + project_config?: CiConfigFromRepo; + meta?: RawJson; } export type ScanResponse = { @@ -1039,7 +1039,7 @@ export function writePosition(x: Position, context: any = x): any { return { 'line': _atd_write_required_field('Position', 'line', _atd_write_int, x.line, x), 'col': _atd_write_required_field('Position', 'col', _atd_write_int, x.col, x), - 'offset': _atd_write_field_with_default(_atd_write_int, 0, x.offset, x), + 'offset': _atd_write_required_field('Position', 'offset', _atd_write_int, x.offset, x), }; } @@ -1047,7 +1047,7 @@ export function readPosition(x: any, context: any = x): Position { return { line: _atd_read_required_field('Position', 'line', _atd_read_int, x['line'], x), col: _atd_read_required_field('Position', 'col', _atd_read_int, x['col'], x), - offset: _atd_read_field_with_default(_atd_read_int, 0, x['offset'], x), + offset: _atd_read_required_field('Position', 'offset', _atd_read_int, x['offset'], x), }; } @@ -2402,6 +2402,22 @@ export function readDependencyMatch(x: any, context: any = x): DependencyMatch { }; } +export function writeDependencyPattern(x: DependencyPattern, context: any = x): any { + return { + 'ecosystem': _atd_write_required_field('DependencyPattern', 'ecosystem', writeEcosystem, x.ecosystem, x), + 'package': _atd_write_required_field('DependencyPattern', 'package', _atd_write_string, x.package_, x), + 'semver_range': _atd_write_required_field('DependencyPattern', 'semver_range', _atd_write_string, x.semver_range, x), + }; +} + +export function readDependencyPattern(x: any, context: any = x): DependencyPattern { + return { + ecosystem: _atd_read_required_field('DependencyPattern', 'ecosystem', readEcosystem, x['ecosystem'], x), + package_: _atd_read_required_field('DependencyPattern', 'package', _atd_read_string, x['package'], x), + semver_range: _atd_read_required_field('DependencyPattern', 'semver_range', _atd_read_string, x['semver_range'], x), + }; +} + export function writeEcosystem(x: Ecosystem, context: any = x): any { switch (x.kind) { case 'Npm': @@ -2463,6 +2479,38 @@ export function readEcosystem(x: any, context: any = x): Ecosystem { } } +export function writeFoundDependency(x: FoundDependency, context: any = x): any { + return { + 'package': _atd_write_required_field('FoundDependency', 'package', _atd_write_string, x.package_, x), + 'version': _atd_write_required_field('FoundDependency', 'version', _atd_write_string, x.version, x), + 'ecosystem': _atd_write_required_field('FoundDependency', 'ecosystem', writeEcosystem, x.ecosystem, x), + 'allowed_hashes': _atd_write_required_field('FoundDependency', 'allowed_hashes', _atd_write_assoc_map_to_object(_atd_write_array(_atd_write_string)), x.allowed_hashes, x), + 'resolved_url': _atd_write_optional_field(_atd_write_string, x.resolved_url, x), + 'transitivity': _atd_write_required_field('FoundDependency', 'transitivity', writeTransitivity, x.transitivity, x), + 'manifest_path': _atd_write_optional_field(writeFpath, x.manifest_path, x), + 'lockfile_path': _atd_write_optional_field(writeFpath, x.lockfile_path, x), + 'line_number': _atd_write_optional_field(_atd_write_int, x.line_number, x), + 'children': _atd_write_optional_field(_atd_write_array(writeDependencyChild), x.children, x), + 'git_ref': _atd_write_optional_field(_atd_write_string, x.git_ref, x), + }; +} + +export function readFoundDependency(x: any, context: any = x): FoundDependency { + return { + package_: _atd_read_required_field('FoundDependency', 'package', _atd_read_string, x['package'], x), + version: _atd_read_required_field('FoundDependency', 'version', _atd_read_string, x['version'], x), + ecosystem: _atd_read_required_field('FoundDependency', 'ecosystem', readEcosystem, x['ecosystem'], x), + allowed_hashes: _atd_read_required_field('FoundDependency', 'allowed_hashes', _atd_read_assoc_object_into_map(_atd_read_array(_atd_read_string)), x['allowed_hashes'], x), + resolved_url: _atd_read_optional_field(_atd_read_string, x['resolved_url'], x), + transitivity: _atd_read_required_field('FoundDependency', 'transitivity', readTransitivity, x['transitivity'], x), + manifest_path: _atd_read_optional_field(readFpath, x['manifest_path'], x), + lockfile_path: _atd_read_optional_field(readFpath, x['lockfile_path'], x), + line_number: _atd_read_optional_field(_atd_read_int, x['line_number'], x), + children: _atd_read_optional_field(_atd_read_array(readDependencyChild), x['children'], x), + git_ref: _atd_read_optional_field(_atd_read_string, x['git_ref'], x), + }; +} + export function writeTransitivity(x: Transitivity, context: any = x): any { switch (x.kind) { case 'Direct': @@ -2488,22 +2536,6 @@ export function readTransitivity(x: any, context: any = x): Transitivity { } } -export function writeDependencyPattern(x: DependencyPattern, context: any = x): any { - return { - 'ecosystem': _atd_write_required_field('DependencyPattern', 'ecosystem', writeEcosystem, x.ecosystem, x), - 'package': _atd_write_required_field('DependencyPattern', 'package', _atd_write_string, x.package_, x), - 'semver_range': _atd_write_required_field('DependencyPattern', 'semver_range', _atd_write_string, x.semver_range, x), - }; -} - -export function readDependencyPattern(x: any, context: any = x): DependencyPattern { - return { - ecosystem: _atd_read_required_field('DependencyPattern', 'ecosystem', readEcosystem, x['ecosystem'], x), - package_: _atd_read_required_field('DependencyPattern', 'package', _atd_read_string, x['package'], x), - semver_range: _atd_read_required_field('DependencyPattern', 'semver_range', _atd_read_string, x['semver_range'], x), - }; -} - export function writeDependencyChild(x: DependencyChild, context: any = x): any { return { 'package': _atd_write_required_field('DependencyChild', 'package', _atd_write_string, x.package_, x), @@ -2518,35 +2550,25 @@ export function readDependencyChild(x: any, context: any = x): DependencyChild { }; } -export function writeFoundDependency(x: FoundDependency, context: any = x): any { +export function writeDependencyParserError(x: DependencyParserError, context: any = x): any { return { - 'package': _atd_write_required_field('FoundDependency', 'package', _atd_write_string, x.package_, x), - 'version': _atd_write_required_field('FoundDependency', 'version', _atd_write_string, x.version, x), - 'ecosystem': _atd_write_required_field('FoundDependency', 'ecosystem', writeEcosystem, x.ecosystem, x), - 'allowed_hashes': _atd_write_required_field('FoundDependency', 'allowed_hashes', _atd_write_assoc_map_to_object(_atd_write_array(_atd_write_string)), x.allowed_hashes, x), - 'resolved_url': _atd_write_optional_field(_atd_write_string, x.resolved_url, x), - 'transitivity': _atd_write_required_field('FoundDependency', 'transitivity', writeTransitivity, x.transitivity, x), - 'manifest_path': _atd_write_optional_field(writeFpath, x.manifest_path, x), - 'lockfile_path': _atd_write_optional_field(writeFpath, x.lockfile_path, x), - 'line_number': _atd_write_optional_field(_atd_write_int, x.line_number, x), - 'children': _atd_write_optional_field(_atd_write_array(writeDependencyChild), x.children, x), - 'git_ref': _atd_write_optional_field(_atd_write_string, x.git_ref, x), + 'path': _atd_write_required_field('DependencyParserError', 'path', _atd_write_string, x.path, x), + 'parser': _atd_write_required_field('DependencyParserError', 'parser', writeScaParserName, x.parser, x), + 'reason': _atd_write_required_field('DependencyParserError', 'reason', _atd_write_string, x.reason, x), + 'line': _atd_write_optional_field(_atd_write_int, x.line, x), + 'col': _atd_write_optional_field(_atd_write_int, x.col, x), + 'text': _atd_write_optional_field(_atd_write_string, x.text, x), }; } -export function readFoundDependency(x: any, context: any = x): FoundDependency { +export function readDependencyParserError(x: any, context: any = x): DependencyParserError { return { - package_: _atd_read_required_field('FoundDependency', 'package', _atd_read_string, x['package'], x), - version: _atd_read_required_field('FoundDependency', 'version', _atd_read_string, x['version'], x), - ecosystem: _atd_read_required_field('FoundDependency', 'ecosystem', readEcosystem, x['ecosystem'], x), - allowed_hashes: _atd_read_required_field('FoundDependency', 'allowed_hashes', _atd_read_assoc_object_into_map(_atd_read_array(_atd_read_string)), x['allowed_hashes'], x), - resolved_url: _atd_read_optional_field(_atd_read_string, x['resolved_url'], x), - transitivity: _atd_read_required_field('FoundDependency', 'transitivity', readTransitivity, x['transitivity'], x), - manifest_path: _atd_read_optional_field(readFpath, x['manifest_path'], x), - lockfile_path: _atd_read_optional_field(readFpath, x['lockfile_path'], x), - line_number: _atd_read_optional_field(_atd_read_int, x['line_number'], x), - children: _atd_read_optional_field(_atd_read_array(readDependencyChild), x['children'], x), - git_ref: _atd_read_optional_field(_atd_read_string, x['git_ref'], x), + path: _atd_read_required_field('DependencyParserError', 'path', _atd_read_string, x['path'], x), + parser: _atd_read_required_field('DependencyParserError', 'parser', readScaParserName, x['parser'], x), + reason: _atd_read_required_field('DependencyParserError', 'reason', _atd_read_string, x['reason'], x), + line: _atd_read_optional_field(_atd_read_int, x['line'], x), + col: _atd_read_optional_field(_atd_read_int, x['col'], x), + text: _atd_read_optional_field(_atd_read_string, x['text'], x), }; } @@ -2643,28 +2665,6 @@ export function readScaParserName(x: any, context: any = x): ScaParserName { } } -export function writeDependencyParserError(x: DependencyParserError, context: any = x): any { - return { - 'path': _atd_write_required_field('DependencyParserError', 'path', _atd_write_string, x.path, x), - 'parser': _atd_write_required_field('DependencyParserError', 'parser', writeScaParserName, x.parser, x), - 'reason': _atd_write_required_field('DependencyParserError', 'reason', _atd_write_string, x.reason, x), - 'line': _atd_write_optional_field(_atd_write_int, x.line, x), - 'col': _atd_write_optional_field(_atd_write_int, x.col, x), - 'text': _atd_write_optional_field(_atd_write_string, x.text, x), - }; -} - -export function readDependencyParserError(x: any, context: any = x): DependencyParserError { - return { - path: _atd_read_required_field('DependencyParserError', 'path', _atd_read_string, x['path'], x), - parser: _atd_read_required_field('DependencyParserError', 'parser', readScaParserName, x['parser'], x), - reason: _atd_read_required_field('DependencyParserError', 'reason', _atd_read_string, x['reason'], x), - line: _atd_read_optional_field(_atd_read_int, x['line'], x), - col: _atd_read_optional_field(_atd_read_int, x['col'], x), - text: _atd_read_optional_field(_atd_read_string, x['text'], x), - }; -} - export function writeHistoricalInfo(x: HistoricalInfo, context: any = x): any { return { 'git_commit': _atd_write_required_field('HistoricalInfo', 'git_commit', writeSha1, x.git_commit, x), @@ -2949,19 +2949,19 @@ export function readScanMetadata(x: any, context: any = x): ScanMetadata { export function writeScanRequest(x: ScanRequest, context: any = x): any { return { - 'meta': _atd_write_required_field('ScanRequest', 'meta', writeRawJson, x.meta, x), 'project_metadata': _atd_write_required_field('ScanRequest', 'project_metadata', writeProjectMetadata, x.project_metadata, x), - 'project_config': _atd_write_required_field('ScanRequest', 'project_config', writeCiConfigFromRepo, x.project_config, x), 'scan_metadata': _atd_write_required_field('ScanRequest', 'scan_metadata', writeScanMetadata, x.scan_metadata, x), + 'project_config': _atd_write_optional_field(writeCiConfigFromRepo, x.project_config, x), + 'meta': _atd_write_optional_field(writeRawJson, x.meta, x), }; } export function readScanRequest(x: any, context: any = x): ScanRequest { return { - meta: _atd_read_required_field('ScanRequest', 'meta', readRawJson, x['meta'], x), project_metadata: _atd_read_required_field('ScanRequest', 'project_metadata', readProjectMetadata, x['project_metadata'], x), - project_config: _atd_read_required_field('ScanRequest', 'project_config', readCiConfigFromRepo, x['project_config'], x), scan_metadata: _atd_read_required_field('ScanRequest', 'scan_metadata', readScanMetadata, x['scan_metadata'], x), + project_config: _atd_read_optional_field(readCiConfigFromRepo, x['project_config'], x), + meta: _atd_read_optional_field(readRawJson, x['meta'], x), }; } diff --git a/semgrep_output_v1_j.ml b/semgrep_output_v1_j.ml index f1e450d5..b47851cd 100644 --- a/semgrep_output_v1_j.ml +++ b/semgrep_output_v1_j.ml @@ -385,10 +385,10 @@ type ci_config_from_repo = Semgrep_output_v1_t.ci_config_from_repo = { } type scan_request = Semgrep_output_v1_t.scan_request = { - meta: raw_json; project_metadata: project_metadata; - project_config: ci_config_from_repo; - scan_metadata: scan_metadata + scan_metadata: scan_metadata; + project_config: ci_config_from_repo option; + meta: raw_json option } type ci_env = Semgrep_output_v1_t.ci_env @@ -1369,7 +1369,7 @@ let read_position = ( Yojson.Safe.read_lcurl p lb; let field_line = ref (None) in let field_col = ref (None) in - let field_offset = ref (0) in + let field_offset = ref (None) in try Yojson.Safe.read_space p lb; Yojson.Safe.read_object_end lb; @@ -1428,13 +1428,13 @@ let read_position = ( ) ); | 2 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_offset := ( + field_offset := ( + Some ( ( Atdgen_runtime.Oj_run.read_int ) p lb - ); - ) + ) + ); | _ -> ( Yojson.Safe.skip_json p lb ) @@ -1497,13 +1497,13 @@ let read_position = ( ) ); | 2 -> - if not (Yojson.Safe.read_null_if_possible p lb) then ( - field_offset := ( + field_offset := ( + Some ( ( Atdgen_runtime.Oj_run.read_int ) p lb - ); - ) + ) + ); | _ -> ( Yojson.Safe.skip_json p lb ) @@ -1515,7 +1515,7 @@ let read_position = ( { line = (match !field_line with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "line"); col = (match !field_col with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "col"); - offset = !field_offset; + offset = (match !field_offset with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "offset"); } : position) ) @@ -14501,19 +14501,67 @@ let read_ci_config_from_repo = ( ) let ci_config_from_repo_of_string s = read_ci_config_from_repo (Yojson.Safe.init_lexer ()) (Lexing.from_string s) +let write__ci_config_from_repo_option = ( + Atdgen_runtime.Oj_run.write_std_option ( + write_ci_config_from_repo + ) +) +let string_of__ci_config_from_repo_option ?(len = 1024) x = + let ob = Buffer.create len in + write__ci_config_from_repo_option ob x; + Buffer.contents ob +let read__ci_config_from_repo_option = ( + fun p lb -> + Yojson.Safe.read_space p lb; + match Yojson.Safe.start_any_variant p lb with + | `Edgy_bracket -> ( + match Yojson.Safe.read_ident p lb with + | "None" -> + Yojson.Safe.read_space p lb; + Yojson.Safe.read_gt p lb; + (None : _ option) + | "Some" -> + Atdgen_runtime.Oj_run.read_until_field_value p lb; + let x = ( + read_ci_config_from_repo + ) p lb + in + Yojson.Safe.read_space p lb; + Yojson.Safe.read_gt p lb; + (Some x : _ option) + | x -> + Atdgen_runtime.Oj_run.invalid_variant_tag p x + ) + | `Double_quote -> ( + match Yojson.Safe.finish_string p lb with + | "None" -> + (None : _ option) + | x -> + Atdgen_runtime.Oj_run.invalid_variant_tag p x + ) + | `Square_bracket -> ( + match Atdgen_runtime.Oj_run.read_string p lb with + | "Some" -> + Yojson.Safe.read_space p lb; + Yojson.Safe.read_comma p lb; + Yojson.Safe.read_space p lb; + let x = ( + read_ci_config_from_repo + ) p lb + in + Yojson.Safe.read_space p lb; + Yojson.Safe.read_rbr p lb; + (Some x : _ option) + | x -> + Atdgen_runtime.Oj_run.invalid_variant_tag p x + ) +) +let _ci_config_from_repo_option_of_string s = + read__ci_config_from_repo_option (Yojson.Safe.init_lexer ()) (Lexing.from_string s) let write_scan_request : _ -> scan_request -> _ = ( fun ob (x : scan_request) -> Buffer.add_char ob '{'; let is_first = ref true in - if !is_first then - is_first := false - else - Buffer.add_char ob ','; - Buffer.add_string ob "\"meta\":"; - ( - write_raw_json - ) - ob x.meta; if !is_first then is_first := false else @@ -14525,15 +14573,6 @@ let write_scan_request : _ -> scan_request -> _ = ( ob x.project_metadata; if !is_first then is_first := false - else - Buffer.add_char ob ','; - Buffer.add_string ob "\"project_config\":"; - ( - write_ci_config_from_repo - ) - ob x.project_config; - if !is_first then - is_first := false else Buffer.add_char ob ','; Buffer.add_string ob "\"scan_metadata\":"; @@ -14541,6 +14580,28 @@ let write_scan_request : _ -> scan_request -> _ = ( write_scan_metadata ) ob x.scan_metadata; + (match x.project_config with None -> () | Some x -> + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"project_config\":"; + ( + write_ci_config_from_repo + ) + ob x; + ); + (match x.meta with None -> () | Some x -> + if !is_first then + is_first := false + else + Buffer.add_char ob ','; + Buffer.add_string ob "\"meta\":"; + ( + write_raw_json + ) + ob x; + ); Buffer.add_char ob '}'; ) let string_of_scan_request ?(len = 1024) x = @@ -14551,10 +14612,10 @@ let read_scan_request = ( fun p lb -> Yojson.Safe.read_space p lb; Yojson.Safe.read_lcurl p lb; - let field_meta = ref (None) in let field_project_metadata = ref (None) in - let field_project_config = ref (None) in let field_scan_metadata = ref (None) in + let field_project_config = ref (None) in + let field_meta = ref (None) in try Yojson.Safe.read_space p lb; Yojson.Safe.read_object_end lb; @@ -14566,7 +14627,7 @@ let read_scan_request = ( match len with | 4 -> ( if String.unsafe_get s pos = 'm' && String.unsafe_get s (pos+1) = 'e' && String.unsafe_get s (pos+2) = 't' && String.unsafe_get s (pos+3) = 'a' then ( - 0 + 3 ) else ( -1 @@ -14574,7 +14635,7 @@ let read_scan_request = ( ) | 13 -> ( if String.unsafe_get s pos = 's' && String.unsafe_get s (pos+1) = 'c' && String.unsafe_get s (pos+2) = 'a' && String.unsafe_get s (pos+3) = 'n' && String.unsafe_get s (pos+4) = '_' && String.unsafe_get s (pos+5) = 'm' && String.unsafe_get s (pos+6) = 'e' && String.unsafe_get s (pos+7) = 't' && String.unsafe_get s (pos+8) = 'a' && String.unsafe_get s (pos+9) = 'd' && String.unsafe_get s (pos+10) = 'a' && String.unsafe_get s (pos+11) = 't' && String.unsafe_get s (pos+12) = 'a' then ( - 3 + 1 ) else ( -1 @@ -14590,7 +14651,7 @@ let read_scan_request = ( ) | 16 -> ( if String.unsafe_get s pos = 'p' && String.unsafe_get s (pos+1) = 'r' && String.unsafe_get s (pos+2) = 'o' && String.unsafe_get s (pos+3) = 'j' && String.unsafe_get s (pos+4) = 'e' && String.unsafe_get s (pos+5) = 'c' && String.unsafe_get s (pos+6) = 't' && String.unsafe_get s (pos+7) = '_' && String.unsafe_get s (pos+8) = 'm' && String.unsafe_get s (pos+9) = 'e' && String.unsafe_get s (pos+10) = 't' && String.unsafe_get s (pos+11) = 'a' && String.unsafe_get s (pos+12) = 'd' && String.unsafe_get s (pos+13) = 'a' && String.unsafe_get s (pos+14) = 't' && String.unsafe_get s (pos+15) = 'a' then ( - 1 + 0 ) else ( -1 @@ -14605,14 +14666,6 @@ let read_scan_request = ( ( match i with | 0 -> - field_meta := ( - Some ( - ( - read_raw_json - ) p lb - ) - ); - | 1 -> field_project_metadata := ( Some ( ( @@ -14620,15 +14673,7 @@ let read_scan_request = ( ) p lb ) ); - | 2 -> - field_project_config := ( - Some ( - ( - read_ci_config_from_repo - ) p lb - ) - ); - | 3 -> + | 1 -> field_scan_metadata := ( Some ( ( @@ -14636,6 +14681,26 @@ let read_scan_request = ( ) p lb ) ); + | 2 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_project_config := ( + Some ( + ( + read_ci_config_from_repo + ) p lb + ) + ); + ) + | 3 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_meta := ( + Some ( + ( + read_raw_json + ) p lb + ) + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -14651,7 +14716,7 @@ let read_scan_request = ( match len with | 4 -> ( if String.unsafe_get s pos = 'm' && String.unsafe_get s (pos+1) = 'e' && String.unsafe_get s (pos+2) = 't' && String.unsafe_get s (pos+3) = 'a' then ( - 0 + 3 ) else ( -1 @@ -14659,7 +14724,7 @@ let read_scan_request = ( ) | 13 -> ( if String.unsafe_get s pos = 's' && String.unsafe_get s (pos+1) = 'c' && String.unsafe_get s (pos+2) = 'a' && String.unsafe_get s (pos+3) = 'n' && String.unsafe_get s (pos+4) = '_' && String.unsafe_get s (pos+5) = 'm' && String.unsafe_get s (pos+6) = 'e' && String.unsafe_get s (pos+7) = 't' && String.unsafe_get s (pos+8) = 'a' && String.unsafe_get s (pos+9) = 'd' && String.unsafe_get s (pos+10) = 'a' && String.unsafe_get s (pos+11) = 't' && String.unsafe_get s (pos+12) = 'a' then ( - 3 + 1 ) else ( -1 @@ -14675,7 +14740,7 @@ let read_scan_request = ( ) | 16 -> ( if String.unsafe_get s pos = 'p' && String.unsafe_get s (pos+1) = 'r' && String.unsafe_get s (pos+2) = 'o' && String.unsafe_get s (pos+3) = 'j' && String.unsafe_get s (pos+4) = 'e' && String.unsafe_get s (pos+5) = 'c' && String.unsafe_get s (pos+6) = 't' && String.unsafe_get s (pos+7) = '_' && String.unsafe_get s (pos+8) = 'm' && String.unsafe_get s (pos+9) = 'e' && String.unsafe_get s (pos+10) = 't' && String.unsafe_get s (pos+11) = 'a' && String.unsafe_get s (pos+12) = 'd' && String.unsafe_get s (pos+13) = 'a' && String.unsafe_get s (pos+14) = 't' && String.unsafe_get s (pos+15) = 'a' then ( - 1 + 0 ) else ( -1 @@ -14690,14 +14755,6 @@ let read_scan_request = ( ( match i with | 0 -> - field_meta := ( - Some ( - ( - read_raw_json - ) p lb - ) - ); - | 1 -> field_project_metadata := ( Some ( ( @@ -14705,15 +14762,7 @@ let read_scan_request = ( ) p lb ) ); - | 2 -> - field_project_config := ( - Some ( - ( - read_ci_config_from_repo - ) p lb - ) - ); - | 3 -> + | 1 -> field_scan_metadata := ( Some ( ( @@ -14721,6 +14770,26 @@ let read_scan_request = ( ) p lb ) ); + | 2 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_project_config := ( + Some ( + ( + read_ci_config_from_repo + ) p lb + ) + ); + ) + | 3 -> + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_meta := ( + Some ( + ( + read_raw_json + ) p lb + ) + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -14730,10 +14799,10 @@ let read_scan_request = ( with Yojson.End_of_object -> ( ( { - meta = (match !field_meta with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "meta"); project_metadata = (match !field_project_metadata with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "project_metadata"); - project_config = (match !field_project_config with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "project_config"); scan_metadata = (match !field_scan_metadata with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "scan_metadata"); + project_config = !field_project_config; + meta = !field_meta; } : scan_request) ) diff --git a/semgrep_output_v1_j.mli b/semgrep_output_v1_j.mli index a11a9e54..b2cd7949 100644 --- a/semgrep_output_v1_j.mli +++ b/semgrep_output_v1_j.mli @@ -385,10 +385,10 @@ type ci_config_from_repo = Semgrep_output_v1_t.ci_config_from_repo = { } type scan_request = Semgrep_output_v1_t.scan_request = { - meta: raw_json; project_metadata: project_metadata; - project_config: ci_config_from_repo; - scan_metadata: scan_metadata + scan_metadata: scan_metadata; + project_config: ci_config_from_repo option; + meta: raw_json option } type ci_env = Semgrep_output_v1_t.ci_env From 2b784871a18d66a07b54fdc4c545089d11fbc501 Mon Sep 17 00:00:00 2001 From: pad Date: Thu, 28 Nov 2024 09:48:27 +0100 Subject: [PATCH 5/5] more --- semgrep_output_v1.atd | 6 +++++- semgrep_output_v1.jsonschema | 2 +- semgrep_output_v1.proto | 2 +- semgrep_output_v1.py | 4 ++-- semgrep_output_v1.ts | 4 ++-- semgrep_output_v1_j.ml | 20 ++++++++++---------- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index d74a99e2..4870c69a 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -107,8 +107,12 @@ type position col: int; (* starts from 1 *) (* Byte position from the beginning of the file, starts at 0. * OCaml code sets it correctly. Python code sets it to a dummy value (-1). + * This uses '~' because pysemgrep < 1.30? was *producing* positions without + * offset sometimes, and we want the backend to still *consume* such positions. + * Note that pysemgrep 1.97 was still producing dummy positions without + * an offset so we might need this ~offset longer than expected? *) - offset: int; + ~offset: int; } (* a.k.a range *) diff --git a/semgrep_output_v1.jsonschema b/semgrep_output_v1.jsonschema index f3d1badf..71ecccc1 100644 --- a/semgrep_output_v1.jsonschema +++ b/semgrep_output_v1.jsonschema @@ -44,7 +44,7 @@ "version": { "type": "string" }, "position": { "type": "object", - "required": [ "line", "col", "offset" ], + "required": [ "line", "col" ], "properties": { "line": { "type": "integer" }, "col": { "type": "integer" }, diff --git a/semgrep_output_v1.proto b/semgrep_output_v1.proto index 287cdf82..94121246 100644 --- a/semgrep_output_v1.proto +++ b/semgrep_output_v1.proto @@ -1,6 +1,6 @@ // Generated by jsonschema2protobuf. DO NOT EDIT! // Source file: semgrep_output_v1.jsonschema -// Source file sha256 digest: 0c194c187f09f172cfec5d6fe181725ee6e5c4cd5463f91a6d0843bc61e61149 +// Source file sha256 digest: 041691dfc4c36d3cff065e3dd6973c16cd2b6e9704138c38bb5d75c370eec5cb syntax = "proto3"; diff --git a/semgrep_output_v1.py b/semgrep_output_v1.py index afbcb68d..e14a200a 100644 --- a/semgrep_output_v1.py +++ b/semgrep_output_v1.py @@ -787,7 +787,7 @@ class Position: line: int col: int - offset: int + offset: int = field(default_factory=lambda: 0) @classmethod def from_json(cls, x: Any) -> 'Position': @@ -795,7 +795,7 @@ def from_json(cls, x: Any) -> 'Position': return cls( line=_atd_read_int(x['line']) if 'line' in x else _atd_missing_json_field('Position', 'line'), col=_atd_read_int(x['col']) if 'col' in x else _atd_missing_json_field('Position', 'col'), - offset=_atd_read_int(x['offset']) if 'offset' in x else _atd_missing_json_field('Position', 'offset'), + offset=_atd_read_int(x['offset']) if 'offset' in x else 0, ) else: _atd_bad_json('Position', x) diff --git a/semgrep_output_v1.ts b/semgrep_output_v1.ts index 8ab7381d..05a57d17 100644 --- a/semgrep_output_v1.ts +++ b/semgrep_output_v1.ts @@ -1039,7 +1039,7 @@ export function writePosition(x: Position, context: any = x): any { return { 'line': _atd_write_required_field('Position', 'line', _atd_write_int, x.line, x), 'col': _atd_write_required_field('Position', 'col', _atd_write_int, x.col, x), - 'offset': _atd_write_required_field('Position', 'offset', _atd_write_int, x.offset, x), + 'offset': _atd_write_field_with_default(_atd_write_int, 0, x.offset, x), }; } @@ -1047,7 +1047,7 @@ export function readPosition(x: any, context: any = x): Position { return { line: _atd_read_required_field('Position', 'line', _atd_read_int, x['line'], x), col: _atd_read_required_field('Position', 'col', _atd_read_int, x['col'], x), - offset: _atd_read_required_field('Position', 'offset', _atd_read_int, x['offset'], x), + offset: _atd_read_field_with_default(_atd_read_int, 0, x['offset'], x), }; } diff --git a/semgrep_output_v1_j.ml b/semgrep_output_v1_j.ml index b47851cd..561dba12 100644 --- a/semgrep_output_v1_j.ml +++ b/semgrep_output_v1_j.ml @@ -1369,7 +1369,7 @@ let read_position = ( Yojson.Safe.read_lcurl p lb; let field_line = ref (None) in let field_col = ref (None) in - let field_offset = ref (None) in + let field_offset = ref (0) in try Yojson.Safe.read_space p lb; Yojson.Safe.read_object_end lb; @@ -1428,13 +1428,13 @@ let read_position = ( ) ); | 2 -> - field_offset := ( - Some ( + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_offset := ( ( Atdgen_runtime.Oj_run.read_int ) p lb - ) - ); + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -1497,13 +1497,13 @@ let read_position = ( ) ); | 2 -> - field_offset := ( - Some ( + if not (Yojson.Safe.read_null_if_possible p lb) then ( + field_offset := ( ( Atdgen_runtime.Oj_run.read_int ) p lb - ) - ); + ); + ) | _ -> ( Yojson.Safe.skip_json p lb ) @@ -1515,7 +1515,7 @@ let read_position = ( { line = (match !field_line with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "line"); col = (match !field_col with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "col"); - offset = (match !field_offset with Some x -> x | None -> Atdgen_runtime.Oj_run.missing_field p "offset"); + offset = !field_offset; } : position) )