Skip to content

Commit 3171f20

Browse files
authored
Add sca_info in core_match_extra (#328)
So we can generate it from semgrep-core and display it in osemgrep test plan: see related PR in semgrep-pro - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.50.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades Note that the types related to the semgrep-core JSON output or the semgrep-core RPC do not need to be backward compatible! Co-authored-by: pad <[email protected]>
1 parent 8ffc890 commit 3171f20

7 files changed

+12526
-12481
lines changed

semgrep_output_v1.atd

+28-43
Original file line numberDiff line numberDiff line change
@@ -274,20 +274,16 @@ type cli_match_extra = {
274274
?metavars: metavars option;
275275

276276
(* Those fields are derived from the rule but the metavariables
277-
* they contain have been expanded to their concrete value.
278-
*)
277+
* they contain have been expanded to their concrete value. *)
279278
message: string;
279+
280280
(* If present, semgrep was able to compute a string that should be
281281
* inserted in place of the text in the matched range in order to fix the
282-
* finding.
283-
* Note that this is the result of applying both the fix: or fix_regex:
284-
* in a rule.
285-
*)
282+
* finding. Note that this is the result of applying both the fix: or
283+
* fix_regex: in a rule. *)
286284
?fix: string option;
287-
288285
(* TODO: done with monkey patching right now in the Python code,
289-
* and seems to be used only when sending findings to the backend.
290-
*)
286+
* and seems to be used only when sending findings to the backend. *)
291287
?fixed_lines: string list option;
292288

293289
(* fields coming from the rule *)
@@ -298,20 +294,18 @@ type cli_match_extra = {
298294
fingerprint: string;
299295
lines: string;
300296

301-
(* extra fields *)
297+
(* for nosemgrep *)
302298
?is_ignored: bool option;
303299

304300
(* EXPERIMENTAL: added by dependency_aware code *)
305-
?sca_info: sca_info option;
306-
301+
?sca_info: sca_match option;
307302
(* EXPERIMENTAL: If present indicates the status of postprocessor validation.
308303
* This field not being present should be equivalent to No_validator.
309304
* Added in semgrep 1.37.0 *)
310305
?validation_state: validation_state option;
311306
(* EXPERIMENTAL: added by secrets post-processing & historical scanning code
312307
* Since 1.60.0. *)
313308
?historical_info: historical_info option;
314-
315309
(* EXPERIMENTAL: For now, present only for taint findings. May be extended to
316310
* otherslater on. *)
317311
?dataflow_trace: match_dataflow_trace option;
@@ -530,41 +524,40 @@ type transitivity
530524
]
531525

532526
(* part of cli_match_extra *)
533-
type sca_info = {
527+
type sca_match = {
534528
reachable: bool;
535529
reachability_rule: bool;
536530
sca_finding_schema: int;
537531
dependency_match: dependency_match;
538532
}
539533

540534
type dependency_match = {
541-
dependency_pattern: dependency_pattern;
535+
dependency_pattern: sca_pattern;
542536
found_dependency: found_dependency;
543537
lockfile: fpath;
544538
}
545539

546-
type dependency_pattern = {
540+
type sca_pattern = {
547541
ecosystem: ecosystem;
548542
package: string;
549543
semver_range: string;
550544
}
551545

546+
(* alt: sca_dependency? *)
552547
type found_dependency = {
553548
package: string;
554549
version: string;
555550
ecosystem: ecosystem;
551+
(* ??? *)
556552
allowed_hashes: (string * string list) list
557-
<json repr="object">
558-
<python repr="dict">
559-
<ts repr="map">;
553+
<json repr="object"> <python repr="dict"> <ts repr="map">;
560554
?resolved_url: string option;
561555
transitivity: transitivity;
562556
(* Path to the manifest file that defines the project containing this
563557
* dependency. Examples: package.json, nested/folder/pom.xml
564558
*)
565559
?manifest_path: fpath option;
566560
(* Path to the lockfile that contains this dependency.
567-
*
568561
* Examples: package-lock.json, nested/folder/requirements.txt, go.mod
569562
* Since 1.87.0
570563
*)
@@ -1476,12 +1469,11 @@ type finding = {
14761469

14771470
?fixed_lines: string list option;
14781471

1479-
?sca_info: sca_info option;
1480-
(* Note that this contains code!
1481-
* TODO? do we need to send this to the App?
1482-
*)
1472+
(* added in ?? *)
1473+
?sca_info: sca_match option;
1474+
(* Note that this contains code! TODO? do we need to send this to the App? *)
14831475
?dataflow_trace: match_dataflow_trace option;
1484-
(* Added in semgrep 1.39.0 see comments in cli_match_extra. *)
1476+
(* Added in semgrep 1.39.0 see comments in cli_match_extra *)
14851477
?validation_state: validation_state option;
14861478
(* Added in semgrep 1.65.0 see comments in cli_match_extra *)
14871479
?historical_info: historical_info option;
@@ -1801,7 +1793,6 @@ type core_output = {
18011793
results: core_match list;
18021794
(* errors are guaranteed to be duplicate free; see also Report.ml *)
18031795
errors: core_error list;
1804-
18051796
inherit cli_output_extra;
18061797
}
18071798

@@ -1812,32 +1803,26 @@ type core_match <python decorator="dataclass(frozen=True)"> = {
18121803
extra: core_match_extra;
18131804
}
18141805

1815-
(* TODO: try to make it as close as possible to 'cli_match_extra' below *)
1806+
(* TODO: try to make it as close as possible to 'cli_match_extra' below
1807+
* See the corresponding comment in cli_match_extra for more information
1808+
* about the fields below.
1809+
*)
18161810
type core_match_extra <python decorator="dataclass(frozen=True)"> = {
1811+
metavars: metavars;
1812+
engine_kind: engine_of_finding;
1813+
is_ignored: bool;
18171814
(* These fields generally come from the rule, but may be set here if they're
18181815
* being overriden for that particular finding. This would currently occur
1819-
* for rule with a validator for secrets, depending on what the valdiator
1816+
* for rule with a validator for secrets, depending on what the validator
18201817
* might match, but could be expanded in the future.
1821-
*
1822-
* Added in semgrep 1.44.0 *)
1818+
*)
18231819
?message: string option;
18241820
?metadata: raw_json option;
18251821
?severity: match_severity option;
1826-
1827-
metavars: metavars;
1828-
(* old: was called rendered_fix *)
18291822
?fix: string option;
1830-
18311823
?dataflow_trace: match_dataflow_trace option;
1832-
engine_kind: engine_of_finding;
1833-
(* for nosemgrep *)
1834-
is_ignored: bool;
1835-
(* If present indicates the status of postprocessor validation. This field
1836-
* not being present should be equivalent to No_validator.
1837-
* Added in semgrep 1.37.0 *)
1824+
?sca_match: sca_match option;
18381825
?validation_state : validation_state option;
1839-
(* EXPERIMENTAL: added by secrets post-processing & historical scanning code.
1840-
* Since 1.63.0. *)
18411826
?historical_info: historical_info option;
18421827
(* Escape hatch to pass untyped info from semgrep-core to the semgrep output.
18431828
* Useful for quick experiments, especially when combined with semgrep
@@ -1860,7 +1845,7 @@ type core_error <python decorator="dataclass(frozen=True)"> = {
18601845
?details: string option;
18611846
?location: location option;
18621847
?rule_id: rule_id option;
1863-
}
1848+
}
18641849

18651850
(*****************************************************************************)
18661851
(* semgrep-core JSON input via -targets (from pysemgrep) *)

semgrep_output_v1.jsonschema

+9-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

semgrep_output_v1.proto

+10-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)