From 0ba2bca4ab732b36dcaf4928c39068678c255e68 Mon Sep 17 00:00:00 2001 From: Yoann Padioleau Date: Fri, 10 Nov 2023 14:25:33 +0100 Subject: [PATCH] New extract, in rule_syntax_v2.atd (#191) test plan: see tests in related PR in semgrep - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.17.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades --- rule_schema_v2.atd | 60 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/rule_schema_v2.atd b/rule_schema_v2.atd index aec7a262..4ba35c80 100644 --- a/rule_schema_v2.atd +++ b/rule_schema_v2.atd @@ -31,7 +31,6 @@ * also the old syntax. * * TODO: - * - extract * - secrets * - steps (but not join) * - new metavariable types @@ -66,35 +65,43 @@ type rule = { message: string; severity: severity; - (* TODO: selector vs analyzer *) + (* later: selector vs analyzer of Martin *) languages: language list; (* CHECK: exactly one of those fields must be set *) ?match_ : formula option; ?taint: taint option; - ?extract: extract option; - (* TODO: steps, secrets *) - (* TODO? product: product; *) + (* TODO: steps:, secrets: *) - (* can work with match/taint/..., maybe not extract *) + (* work with match: (and in theory also with taint: ) *) ?project_depends_on : project_depends_on option; + (* work with match: (and in theory also with taint: ) + * + * alt: message:/severity: could be made optional when extract: is set, + * but it's annoying to change those types just for extract. Moreover, + * users can easily put severity: INFO and a fake message:, + * and at least they can easily test the matching part of the rule + * by removing the extract and run it like a regular rule. + *) + ?extract: extract option; - (* alt: later: could be replaced by a pattern-filename: *) + (* alt: later: could be replaced by a 'filename:' in formula *) ?paths: paths option; ?fix: string option; ?fix_regex: fix_regex option; - (* TODO? impose more constraints on metadata? standard fields? *) - ?metadata: raw_json option; + (* later: equivalences: ... *) ?options: rule_options option; - ?version: version option; + (* TODO? impose more constraints on metadata? standard fields? + * TODO? add also a product: product; ? + *) + ?metadata: raw_json option; + ?min_version: version option; ?max_version: version option; - - (* later: equivalences: ... *) } (* Rule_ID.t, "^[a-zA-Z0-9._-]*$" *) @@ -284,7 +291,7 @@ type formula = { (* later: we should remove with a better range logic *) ?inside: formula option; - (* NEW: since 1.49 *) + (* NEW: since 1.49. alt: in condition instead as in 'where: - also: ...' *) ?anywhere: formula option; (* TODO? ?taint: taint *) @@ -498,11 +505,34 @@ type namespace = [ type semver_range = string (*****************************************************************************) -(* TODO: Extract mode *) +(* Extract *) (*****************************************************************************) -type extract = raw_json +type extract = { + metavariable: mvar; + ?dest_language : language option; + ?dest_rules : dest_rules option; + (* map-reduce! *) + ?reduce: extract_reduce option; + ?transform: extract_transform option; +} + +type dest_rules = { + (* CHECK: at least one of those options is set *) + ?exclude_ : rule_id list option; + ?include_ : rule_id list option; +} +type extract_reduce = [ + | Concat + | Separate +] + +type extract_transform = [ + | NoTransform + | UnquoteString + | ConcatJsonStringArray +] (*****************************************************************************) (* TODO: Secrets *) (*****************************************************************************)