Skip to content

Commit b35746b

Browse files
authored
feat(sca): new SCA stats schema (#313)
# Context The [existing "bytes parsed" metric](https://metabase.corp.semgrep.dev/dashboard/272-ssc-adoption-dashboard?tab=147-summary) we are currently relying on provides a limited perspective on customer SSC adoption, as it aggregates usage across languages without fully capturing which languages are meaningfully used. We want to augment the stats schema to improve metrics around Java dependency detection, lockfile-less scanning, and python requirements identification. - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.17.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
1 parent a95d339 commit b35746b

7 files changed

+5154
-3447
lines changed

semgrep_output_v1.atd

+44
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,50 @@ type ci_scan_complete_stats = {
16871687
<python repr="dict">
16881688
<ts repr="map">
16891689
option;
1690+
1691+
(* since 1.98.0 *)
1692+
(* In collaboration with the Data Science team, it was suggested
1693+
* that we start to group stats by product for organizational purposes.
1694+
*
1695+
* This field will only be defined for SCA scans.
1696+
*)
1697+
?supply_chain_stats: supply_chain_stats option;
1698+
}
1699+
1700+
type resolution_method <ocaml attr="deriving show"> <python decorator="dataclass(frozen=True, order=True)"> = [
1701+
| LockfileParsing
1702+
| DynamicResolution
1703+
]
1704+
1705+
type dependency_resolution_stats = {
1706+
resolution_method: resolution_method;
1707+
dependency_count: int;
1708+
ecosystem: ecosystem;
1709+
}
1710+
1711+
type dependency_source_file_kind <ocaml attr="deriving show"> <python decorator="dataclass(frozen=True)"> = [
1712+
| Lockfile of lockfile_kind
1713+
| Manifest of manifest_kind
1714+
]
1715+
1716+
type dependency_source_file = {
1717+
kind: dependency_source_file_kind;
1718+
path: fpath;
1719+
}
1720+
1721+
type subproject_stats = {
1722+
(* The `subproject_id` is derived as a stable hash of the sorted paths of `dependency_source_file`s.
1723+
Any change to the set of dependency sources (addition, removal, or modification) results in a new
1724+
`subproject_id`, as different dependency sources indicate a different subproject context. *)
1725+
subproject_id: string;
1726+
(* Files used to determine the subproject's dependencies (lockfiles, manifest files, etc) *)
1727+
dependency_sources: dependency_source_file list;
1728+
(* Results of dependency resolution, empty if resolution failed *)
1729+
?resolved_stats: dependency_resolution_stats option;
1730+
}
1731+
1732+
type supply_chain_stats = {
1733+
subprojects_stats: subproject_stats list;
16901734
}
16911735

16921736
type parsing_stats = {

semgrep_output_v1.jsonschema

+69
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

semgrep_output_v1.proto

+23-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)