21
21
* semgrep_metrics.atd
22
22
* - The parsing stats of semgrep-core -parsing_stats -json have its own
23
23
* Parsing_stats.atd
24
- * - The schema for the generic AST dump is in AST_generic_v1.atd
25
24
* For the definition of the Semgrep input (the rules), see rule_schema_v2.atd
26
25
*
27
26
* This file has the _v1 suffix to explicitely represent the
@@ -254,7 +253,7 @@ type product
254
253
<python decorator="dataclass(frozen=True)"> =
255
254
[
256
255
| SAST (* a.k.a. Code *) <json name="sast">
257
- | SCA <json name="sca">
256
+ | SCA (* a.k.a. SSC *) <json name="sca">
258
257
| Secrets <json name="secrets">
259
258
]
260
259
@@ -533,8 +532,8 @@ type transitivity
533
532
*)
534
533
| Transitive <json name="transitive">
535
534
(* If there is insufficient information to determine the transitivity,
536
- such as a requirements.txt file without a requirements.in manifest,
537
- we leave it Unknown.
535
+ * such as a requirements.txt file without a requirements.in manifest,
536
+ * we leave it Unknown.
538
537
*)
539
538
| Unknown <json name="unknown">
540
539
]
@@ -549,7 +548,7 @@ type sca_match = {
549
548
dependency_match: dependency_match;
550
549
(* TODO: deprecate, we should use sca_match_kind instead *)
551
550
reachable: bool;
552
- (* EXPERIMENTAL since 1.08 .0 *)
551
+ (* EXPERIMENTAL since 1.108 .0 *)
553
552
?kind: sca_match_kind option;
554
553
}
555
554
@@ -768,7 +767,7 @@ type error_type
768
767
| PatternParseError0 <json name="Pattern parse error">
769
768
| IncompatibleRule0 <json name="Incompatible rule">
770
769
(* since semgrep 1.94.0 *)
771
- | DependencyResolutionError of resolution_error
770
+ | DependencyResolutionError of resolution_error_kind
772
771
] <ocaml repr="classic">
773
772
774
773
type incompatible_rule
@@ -867,11 +866,11 @@ type skip_reason <ocaml attr="deriving show"> = [
867
866
| Irrelevant_rule <json name="irrelevant_rule">
868
867
| Too_many_matches <json name="too_many_matches">
869
868
(* New in osemgrep *)
870
- | Gitignore_patterns_match (* TODO: use JSON lowercase for consistency *)
869
+ | Gitignore_patterns_match
871
870
(* since 1.40.0. There were always ignored, but not shown in the skip report *)
872
- | Dotfile (* TODO: use JSON lowercase for consistency *)
871
+ | Dotfile
873
872
(* since 1.44.0 *)
874
- | Nonexistent_file (* TODO: use JSON lowercase for consistency *)
873
+ | Nonexistent_file
875
874
(* since 1.94.0 *)
876
875
| Insufficient_permissions <json name="insufficient_permissions">
877
876
] <ocaml repr="classic">
@@ -981,7 +980,6 @@ type target_times = {
981
980
run_time: float;
982
981
}
983
982
984
-
985
983
(*****************************************************************************)
986
984
(* Final 'semgrep scan' output *)
987
985
(*****************************************************************************)
@@ -1056,7 +1054,6 @@ type tests_result = {
1056
1054
}
1057
1055
1058
1056
type checks = {
1059
- (* would like to use fpath *)
1060
1057
checks: (string (* rule_id *) * rule_result) list <json repr="object">;
1061
1058
}
1062
1059
@@ -1218,8 +1215,8 @@ type killing_parent = {
1218
1215
* - /api/agent/scans/<scan_id>/complete when done, with the exit code and a
1219
1216
* few more information and response with app_block_override and reason
1220
1217
*
1221
- * alt : we could move all of this in a separate semgrep_posts_v1 .atd file
1222
- * or semgrep_webapp_v1.atd
1218
+ * TODO : we should move all of this in a separate semgrep_backend .atd
1219
+ * (but need a proper module system for ATD first)
1223
1220
*)
1224
1221
1225
1222
(* ----------------------------- *)
@@ -1230,11 +1227,12 @@ type features = {
1230
1227
~autofix: bool;
1231
1228
~deepsemgrep: bool;
1232
1229
~dependency_query: bool;
1230
+ (* a.k.a. dependency path *)
1233
1231
~path_to_transitivity: bool;
1234
- (* normally we resolve dependencies for changed subprojects only in diff scans. This flag
1235
- * causes all subprojects to be resolved in diff scans *)
1232
+ (* normally we resolve dependencies for changed subprojects only in diff
1233
+ * scans. This flag causes all subprojects to be resolved in diff scans *)
1236
1234
~scan_all_deps_in_diff_scan: bool;
1237
- (* Whether to collect "symbol analysis" information from the repo being scanned
1235
+ (* Whether to collect "symbol analysis" info from the repo being scanned
1238
1236
See https://www.notion.so/semgrep/Semgrep-Code-Reconnaissance-Toolbox-18a3009241a880f2a439eed6b2cffe66?pvs=4
1239
1237
*)
1240
1238
~symbol_analysis: bool;
@@ -1260,6 +1258,7 @@ type triage_ignored = {
1260
1258
* Note that the version of the CLI is sent to the WebApp in
1261
1259
* project_metadata so the backend has all the necessary information to
1262
1260
* send back an appropriate action depending on the CLI version.
1261
+ * TODO: only osemgrep handles that right now
1263
1262
*)
1264
1263
type action = [
1265
1264
| Message of string
@@ -1295,7 +1294,10 @@ type scan_info = {
1295
1294
1296
1295
(* config specific to the scan *)
1297
1296
type scan_configuration = {
1298
- rules: raw_json; (* can we type this better *)
1297
+ (* Rules sent from the backend. Note that those rules are in JSON
1298
+ * form not YAML (which led to some speedup in pysemgrep)
1299
+ * TODO? can we type this better *)
1300
+ rules: raw_json;
1299
1301
inherit triage_ignored;
1300
1302
}
1301
1303
@@ -1322,9 +1324,8 @@ type product_ignored_files = (product * glob list) list
1322
1324
* "keys must be strings" error *)
1323
1325
<python repr="dict"> <ts repr="map">
1324
1326
1325
- (* configuration for scanning version control history,
1326
- * e.g., looking back at past git commits for committed credentials which may
1327
- * have been removed *)
1327
+ (* configuration for scanning version control history, e.g., looking back at
1328
+ * past git commits for committed credentials which may have been removed *)
1328
1329
type historical_configuration = {
1329
1330
enabled: bool;
1330
1331
?lookback_days: int option;
@@ -1425,9 +1426,10 @@ type scan_metadata = {
1425
1426
cli_version: version;
1426
1427
unique_id: uuid; (* client generated uuid for the scan *)
1427
1428
requested_products: product list;
1428
- ~dry_run: bool; (* from 1.47.0 *)
1429
- (* since 1.96.0 *)
1429
+ (* since 1.47.0 *)
1430
+ ~dry_run: bool;
1430
1431
(* unique id associated with the scan in Semgrep Managed Scanning *)
1432
+ (* since 1.96.0 *)
1431
1433
?sms_scan_id: string option;
1432
1434
}
1433
1435
@@ -1466,6 +1468,7 @@ type finding = {
1466
1468
message: string;
1467
1469
(* int|string until minimum version exceeds 1.32.0. After 1.32.0
1468
1470
* we're always using an int.
1471
+ * TODO: switch to int now that minimum version supported is 1.50.0
1469
1472
* TODO: should reuse match_severity instead of using an int here.
1470
1473
* This is what pysemgrep is currently using:
1471
1474
* Error = 2, Warning = 1, Experiment = 4, otherwise 0
@@ -1542,7 +1545,7 @@ type ci_scan_results = {
1542
1545
?contributions: contributions option;
1543
1546
(* since semgrep 1.38.0 *)
1544
1547
(* this data was originally sent to /complete, but we want to start sending
1545
- * it /results *)
1548
+ * it to /results *)
1546
1549
?dependencies: ci_scan_dependencies option;
1547
1550
}
1548
1551
@@ -1585,6 +1588,7 @@ type ci_scan_complete = {
1585
1588
stats: ci_scan_complete_stats;
1586
1589
(* TODO: remove when min version is 1.38.0 *)
1587
1590
?dependencies: ci_scan_dependencies option;
1591
+ (* TODO: move the errors in ci_scan_complete_stats.errors instead *)
1588
1592
?dependency_parser_errors: dependency_parser_error list option;
1589
1593
(* since 1.31.0 *)
1590
1594
?task_id: string option;
@@ -1596,10 +1600,12 @@ type ci_scan_complete_stats = {
1596
1600
errors: cli_error list;
1597
1601
total_time: float;
1598
1602
1603
+ (* ?? *)
1599
1604
unsupported_exts: (string * int) list
1600
1605
<json repr="object">
1601
1606
<python repr="dict">
1602
1607
<ts repr="map">;
1608
+ (* ?? *)
1603
1609
lockfile_scan_info: (string * int) list
1604
1610
<json repr="object">
1605
1611
<python repr="dict">
@@ -1657,11 +1663,11 @@ type ci_scan_complete_response <ocaml attr="deriving show"> = {
1657
1663
(* ----------------------------- *)
1658
1664
(* SCA part 2 *)
1659
1665
(* ----------------------------- *)
1666
+ (* key is ?? lockfile? *)
1660
1667
type ci_scan_dependencies = (string * found_dependency list) list
1661
- <json repr="object">
1662
- <python repr="dict">
1663
- <ts repr="map">
1668
+ <json repr="object"> <python repr="dict"> <ts repr="map">
1664
1669
1670
+ (* TODO: get rid of; should use cli_error with error_type ScaParseError *)
1665
1671
type dependency_parser_error = {
1666
1672
path: fpath;
1667
1673
parser: sca_parser_name;
@@ -1674,33 +1680,34 @@ type dependency_parser_error = {
1674
1680
?text: string option;
1675
1681
}
1676
1682
1677
- (* json names are to maintain backwards compatibility with the python enum it
1678
- * is replacing
1679
- * TODO: use <ocaml repr="classic">
1683
+ (* JSON names are to maintain backwards compatibility with the python enum it
1684
+ * is replacing. The P prefix (for parser) is to avoid ambiguity with similar
1685
+ * construtor names in the manifest and ecosystem types.
1680
1686
*)
1681
- type sca_parser_name = [
1682
- | Gemfile_lock <json name="gemfile_lock">
1683
- | Go_mod <json name="go_mod">
1684
- | Go_sum <json name="go_sum">
1685
- | Gradle_lockfile <json name="gradle_lockfile">
1686
- | Gradle_build <json name="gradle_build">
1687
- | Jsondoc <json name="jsondoc">
1688
- | Pipfile <json name="pipfile">
1689
- | Pnpm_lock <json name="pnpm_lock">
1690
- | Poetry_lock <json name="poetry_lock">
1691
- | Pyproject_toml <json name="pyproject_toml">
1692
- | Requirements <json name="requirements">
1693
- | Yarn_1 <json name="yarn_1">
1694
- | Yarn_2 <json name="yarn_2">
1695
- | Pomtree <json name="pomtree">
1696
- | Cargo_parser <json name="cargo">
1697
- | Composer_lock <json name="composer_lock">
1698
- | Pubspec_lock <json name="pubspec_lock">
1699
- | Package_swift <json name="package_swift">
1700
- | Podfile_lock <json name="podfile_lock">
1701
- | Package_resolved <json name="package_resolved">
1702
- | Mix_lock <json name="mix_lock">
1703
- ]
1687
+ type sca_parser_name <ocaml attr="deriving show"> = [
1688
+ | PGemfile_lock <json name="gemfile_lock">
1689
+ | PGo_mod <json name="go_mod">
1690
+ | PGo_sum <json name="go_sum">
1691
+ | PGradle_lockfile <json name="gradle_lockfile">
1692
+ | PGradle_build <json name="gradle_build">
1693
+ | PJsondoc <json name="jsondoc">
1694
+ | PPipfile <json name="pipfile">
1695
+ | PPnpm_lock <json name="pnpm_lock">
1696
+ | PPoetry_lock <json name="poetry_lock">
1697
+ | PPyproject_toml <json name="pyproject_toml">
1698
+ | PRequirements <json name="requirements">
1699
+ | PYarn_1 <json name="yarn_1">
1700
+ | PYarn_2 <json name="yarn_2">
1701
+ | PPomtree <json name="pomtree">
1702
+ | PCargo_parser <json name="cargo">
1703
+ | PComposer_lock <json name="composer_lock">
1704
+ | PPubspec_lock <json name="pubspec_lock">
1705
+ | PPackage_swift <json name="package_swift">
1706
+ | PPodfile_lock <json name="podfile_lock">
1707
+ | PPackage_resolved <json name="package_resolved">
1708
+ | PMix_lock <json name="mix_lock">
1709
+ ] <ocaml repr="classic">
1710
+
1704
1711
1705
1712
type supply_chain_stats = {
1706
1713
subprojects_stats: subproject_stats list;
@@ -1744,7 +1751,9 @@ type resolution_method
1744
1751
<ocaml attr="deriving show">
1745
1752
<python decorator="dataclass(frozen=True, order=True)"> =
1746
1753
[
1754
+ (* we parsed a lockfile that was already included in the repository *)
1747
1755
| LockfileParsing
1756
+ (* we communicated with the package manager to resolve dependencies *)
1748
1757
| DynamicResolution
1749
1758
]
1750
1759
@@ -1765,8 +1774,8 @@ type ci_scan_failure = {
1765
1774
(* Response by the backend to the CLI to the POST api/agent/deployments/current
1766
1775
* Some of the information in deployment_config is now returned
1767
1776
* directly in scan_response (e.g., the deployment_name)
1768
- * TODO: deprecate this endpoint as it is now used only in 'semgrep login' and
1769
- * in 'semgrep show whoami' to just check whether the token is valid.
1777
+ * TODO? deprecate this endpoint as it is now used only in 'semgrep login' and
1778
+ * in 'semgrep show whoami' to just check whether the token is valid?
1770
1779
*)
1771
1780
type deployment_config <ocaml attr="deriving show"> = {
1772
1781
id : int;
@@ -1901,7 +1910,7 @@ type core_match <python decorator="dataclass(frozen=True)"> = {
1901
1910
extra: core_match_extra;
1902
1911
}
1903
1912
1904
- (* TODO: try to make it as close as possible to 'cli_match_extra' below
1913
+ (* TODO: try to make it as close as possible to 'cli_match_extra'.
1905
1914
* See the corresponding comment in cli_match_extra for more information
1906
1915
* about the fields below.
1907
1916
*)
@@ -1930,9 +1939,9 @@ type core_match_extra <python decorator="dataclass(frozen=True)"> = {
1930
1939
}
1931
1940
1932
1941
(* See Semgrep_error_code.ml *)
1933
- (* TODO: try to make it as close as possible to 'cli_error' above , possibly
1942
+ (* TODO: try to make it as close as possible to 'cli_error', possibly
1934
1943
* extending cli_error with more fields (but those fields must be optional
1935
- * to remain backward compatible
1944
+ * to remain backward compatible)
1936
1945
* LATER: use a proper variant in error_type so we would need less
1937
1946
* of those ?xxx types below (like a ParseError should always have a location)
1938
1947
*)
@@ -2006,7 +2015,7 @@ type analyzer <ocaml attr="deriving show"> = string wrap <ocaml module="Analyzer
2006
2015
(* A target can either be a traditional code target (now with optional
2007
2016
associated lockfile) or it can be a lockfile target, which will be used to
2008
2017
generate lockfile-only findings.
2009
- Currently *ALL TARGETS FROM OSEMGREP AND PYSEMGREP ARE CODETARGETS*
2018
+ Currently *ALL TARGETS FROM PYSEMGREP ARE CODETARGETS*
2010
2019
coupling: with src/target/Target.mli
2011
2020
*)
2012
2021
type target <ocaml attr="deriving show"> = [
@@ -2239,15 +2248,34 @@ type dependency_source
2239
2248
| ManifestLockfileDependencySource of (manifest * lockfile)
2240
2249
] <ocaml repr="classic">
2241
2250
2242
- type resolution_error
2251
+ (* alt: sca_error_kind *)
2252
+ type resolution_error_kind
2243
2253
<ocaml attr="deriving show">
2244
2254
<python decorator="dataclass(frozen=True)"> =
2245
2255
[
2246
2256
| UnsupportedManifest
2247
2257
| MissingRequirement of string
2248
2258
| ResolutionCmdFailed of resolution_cmd_failed
2259
+ (* when we produce some dependency list in lockfileless scanning (by talking
2260
+ * to the package manager) but fail to parse it correctly *)
2249
2261
| ParseDependenciesFailed of string
2250
- ]
2262
+ (* a lockfile parser failed
2263
+ * since semgrep 1.109.0 (to replace dependency_parser_error) *)
2264
+ | ScaParseError of sca_parser_name
2265
+ ] <ocaml repr="classic">
2266
+
2267
+ (* used only from pysemgrep for now
2268
+ * TODO? we should merge dependency_{parser,resolution}_error with cli_error
2269
+ *)
2270
+ type sca_resolution_error = {
2271
+ type_: resolution_error_kind;
2272
+ dependency_source_file: fpath;
2273
+ }
2274
+ type sca_error = [
2275
+ | SCAParse of dependency_parser_error
2276
+ | SCAResol of sca_resolution_error
2277
+ ] <ocaml repr="classic">
2278
+
2251
2279
2252
2280
type resolution_cmd_failed
2253
2281
<ocaml attr="deriving show">
@@ -2262,8 +2290,8 @@ type resolution_cmd_failed
2262
2290
* lockfile parsing might partially fail but recover and still produce results)
2263
2291
*)
2264
2292
type resolution_result = [
2265
- | ResolutionOk of (found_dependency list * resolution_error list)
2266
- | ResolutionError of resolution_error list
2293
+ | ResolutionOk of (found_dependency list * resolution_error_kind list)
2294
+ | ResolutionError of resolution_error_kind list
2267
2295
]
2268
2296
2269
2297
type transitive_finding = {
0 commit comments