14
14
* we still use a JSON tool (here ATD, but also jsonschema) to specify
15
15
* the rule schema because YAML is a superset of JSON and can be
16
16
* mechanically translated into JSON; there is no yamlschema
17
- * (see https://json-schema-everywhere.github.io/yaml). To add even more
18
- * confusion, a jsonschema can actually be specified using YAML (like in
19
- * rule_shema_v1.yml), and so one can use YAML syntax to specify the
20
- * JSON schema of files actually written in YAML (hmmm).
17
+ * (see https://json-schema-everywhere.github.io/yaml).
21
18
*
22
19
* Jsonschema is powerful but also arguably complicated and so it
23
20
* might be simpler for many Semgrep developers (and also some Semgrep
30
27
* Note that this file does not replace Parse_rule.ml nor Rule.ml. We still
31
28
* want to accept the old syntax in Parse_rule.ml and also parse with
32
29
* position information and error recovery which ATD does not provide.
30
+ * This files does not replace either (yet) rule_schema_v1.yml which is
31
+ * more complete.
33
32
*
34
33
* TODO:
34
+ * - taint
35
+ * - extract
35
36
* - r2c-internal-project-depends-on-content
36
37
* - secrets
37
- * - generalized taint
38
38
* - steps (and join?)
39
+ * - generalized taint
40
+ * - new metavariable types
41
+ * - new 'anywhere:'
39
42
*)
40
43
41
44
(*****************************************************************************)
@@ -51,7 +54,6 @@ type glob = string
51
54
(* ex: "[a-zA-Z_]*\\.c" *)
52
55
type regex = string
53
56
54
-
55
57
(*****************************************************************************)
56
58
(* The rule *)
57
59
(*****************************************************************************)
@@ -65,14 +67,13 @@ type rule = {
65
67
(* TODO: selector vs analyzer *)
66
68
languages: language list;
67
69
68
- (* at least one of those must be set *)
70
+ (* CHECK: exactly one of those fields must be set *)
69
71
?match_ <json name="match">: formula option;
70
72
?taint: taint_spec option;
71
73
?extract: extract option;
72
74
(* TODO: join, steps, secrets, sca *)
73
75
74
- ~mode <ocaml default="`Search">: mode;
75
- (* TODO: product: product *)
76
+ (* TODO? product: product; *)
76
77
77
78
(* TODO? could be replaced by a pattern-filename: *)
78
79
?paths: paths option;
@@ -81,7 +82,7 @@ type rule = {
81
82
?fix_regex: fix_regex option;
82
83
83
84
?metadata: raw_json option;
84
- ?options: options option;
85
+ ?options: rule_options option;
85
86
86
87
?version: version option;
87
88
?min_version: version option;
@@ -96,15 +97,6 @@ type rule_id = string wrap <ocaml module="Rule_ID">
96
97
(* Version_info.t *)
97
98
type version = string (* TODO wrap <ocaml module="ATDStringWrap.Version"> *)
98
99
99
- type mode = [
100
- | Search <json name="search">
101
- | Taint <json name="taint">
102
- | Join <json name="join">
103
- | Extract <json name="extract">
104
- | SemgrepInternalPostprocessor <json name="semgrep_internal_postprocessor">
105
- (* TODO: Steps, SCA? *)
106
- ]
107
-
108
100
(*****************************************************************************)
109
101
(* Types of rule fields *)
110
102
(*****************************************************************************)
@@ -189,87 +181,49 @@ type fix_regex = {
189
181
?count: int option;
190
182
}
191
183
192
- type options <ocaml from="Rule_options" t="t"> = abstract
184
+ type rule_options <ocaml from="Rule_options" t="t"> = abstract
193
185
194
186
(*****************************************************************************)
195
- (* Search mode (default) and formula *)
187
+ (* Formula *)
196
188
(*****************************************************************************)
197
189
198
190
(* 'formula' below is handled by a <json adapter.ocaml=...> because there is no
199
191
* way to encode directly using ATD the way we chose to represent formulas
200
- * in YAML/JSON. Indeed, because Yaml/JSON does not support Algebraic data
201
- * types (ADTs), we used a weird encoding abusing objects to represent
202
- * formulas, e.g.,
203
- *
204
- * any:
205
- * - and:
206
- * - "foo"
207
- * - pattern: "bar"
208
- * where:
209
- * - bla
210
- *
211
- * which when turned into JSON gives:
212
- *
213
- * { any: [
214
- * { and: [
215
- * "foo",
216
- * {pattern: "bar" }
217
- * ],
218
- * where: [ bla ],
219
- * }
220
- * ]
221
- * }
222
- *
223
- * The ATD way would be to encode a formula as
192
+ * in YAML/JSON.
224
193
*
225
- * {f: ["Any", [
226
- * {f: ["And", [
227
- * {f: ["Pattern", "foo"]},
228
- * {f: ["Pattern", "bar"]},
229
- * ]],
230
- * where: [bla]
231
- * }
232
- * ]]
233
- * }
194
+ * alt: instead of using those ?all, ?regex, and CHECK:, we could use a
195
+ * proper variant, but that would require a more complex adapter and the
196
+ * distance between the spec and the actual syntax would be even longer.
234
197
*
235
- * So we need rule_schema_v2_adapter.ml used below to transform the first JSON
236
- * in the second dynamically at parsing time, so then ATD can parse it
237
- * using the formula type specified below.
238
- *)
239
-
198
+ * old: this type was called new-pattern in rule_schema_v1.yaml
199
+ *)
240
200
type formula = {
241
- (* alt: have ?all: ... ?any: ... ?regex: ... ?pattern: ... with a check
242
- * at parsing time that only one of those fields is given.
243
- *)
244
- f: formula_bis;
245
- (* alt: we could instead do 'All of formula list * condition list' below
246
- * but syntactically we also allow 'where' with pattern:, regex:, etc.
247
- * as in
248
- * { pattern: ..., where: ..., }
249
- * Even though internally in Rule.ml a { pattern: X, where: Y}
250
- * is transformed in an All [pattern: X, Y], in the syntax
251
- * we allow it in more places.
201
+ (* CHECK: exactly one of those fields must be set *)
202
+ (* either directly a string or pattern: string in the JSON *)
203
+ ?pattern: string option;
204
+ ?regex: regex option;
205
+ ?all: formula list option;
206
+ ?any: formula list option;
207
+ (* CHECK: not/inside/anywhere can appear only inside an all: *)
208
+ ?not: formula option;
209
+ ?inside: formula option;
210
+ ?anywhere: formula option;
211
+ (* TODO? ?taint: taint_spec *)
212
+
213
+ (* alt: we could instead do '?all: formula list option * condition list'
214
+ * above, but syntactically we also allow 'where' with pattern:, regex:,
215
+ * etc. as in:
216
+ *
217
+ * - pattern: "foo($X)"
218
+ * where: ...
219
+ *
220
+ * In fact that's the main reason we sometimes have to use pattern: string
221
+ * instead of a string because where: could not be attached to a string.
252
222
*)
253
223
~where: condition list;
254
224
}
255
225
<json adapter.ocaml="Rule_schema_v2_adapter.Formula">
256
226
257
- (* old: this type was called new-pattern in rule_schema_v1.yaml but formula in
258
- * Rule.ml
259
- *)
260
- type formula_bis = [
261
- (* either directly a string or pattern: string in the JSON *)
262
- | Pattern <json name="pattern"> of string
263
- | Regex <json name="regex"> of regex
264
- (* 'And of conjunction' in Rule.ml *)
265
- | All <json name="all"> of formula list
266
- | Any <json name="any"> of formula list
267
- (* Not and Inside can appear only inside an All *)
268
- | Not <json name="not"> of formula
269
- | Inside <json name="inside"> of formula
270
- (* TODO? Taint of taint_spec *)
271
- ]
272
-
273
227
(* Just like for formula, we're using an adapter to transform
274
228
* conditions in YAML like:
275
229
*
@@ -288,7 +242,7 @@ type formula_bis = [
288
242
* which we must transform in an ATD-compliant:
289
243
*
290
244
* [ ["M", [{ metavariable: $X,
291
- * c: ["regex", $Z]
245
+ * regex: $Z
292
246
* }]
293
247
* ]]
294
248
*)
@@ -299,10 +253,10 @@ type condition = [
299
253
]
300
254
<json adapter.ocaml="Rule_schema_v2_adapter.Condition">
301
255
302
- (* either a single string or an array in JSON, that is
303
- * {focus: "$FOO"}, but also {focus: ["$FOO", "$BAR"]}
304
- *)
305
256
type focus = {
257
+ (* either a single string or an array in JSON, that is
258
+ * {focus: "$FOO"}, but also {focus: ["$FOO", "$BAR"]}
259
+ *)
306
260
focus: mvar list;
307
261
}
308
262
@@ -316,39 +270,43 @@ type comparison = {
316
270
317
271
type metavariable_cond = {
318
272
metavariable: mvar;
319
- (* alt: have ?type: ... ?types:... ?regex: ... *)
320
- c: metavariable_cond_bis;
321
- }
322
273
323
- type metavariable_cond_bis = [
324
- | Type <json name="type"> of string
325
- | Types <json name=" types"> of string list
326
- (* alt: we could remove Regex as Formula itself as a Regex
274
+ (* CHECK: exactly one of those fields must be set *)
275
+ ?type_ <json name="type">: string option;
276
+ ? types: string list option;
277
+ (* this covers regex:, pattern:, but also any formula.
327
278
* TODO: for metavariable-regex, can also enable constant_propagation
279
+ * TOOD: we should accept also language: string
328
280
*)
329
- | Regex <json name="regex"> of regex
330
- (* TODO: accept also language: string *)
331
- | Formula <json name="F"> of formula
332
- | Analyzer <json name="analyzer"> of analyzer
333
- ]
281
+ inherit formula;
282
+ ?analyzer: analyzer option;
283
+ }
334
284
335
285
type analyzer = [
336
286
| Entropy <json name="entropy">
337
287
| Redos <json name="redos">
338
288
]
339
289
340
290
(*****************************************************************************)
341
- (* Taint mode *)
291
+ (* TODO: Tainting *)
342
292
(*****************************************************************************)
343
293
344
294
type taint_spec = raw_json
345
295
346
296
(*****************************************************************************)
347
- (* Extract mode *)
297
+ (* TODO: SSC *)
298
+ (*****************************************************************************)
299
+
300
+ (*****************************************************************************)
301
+ (* TODO: Extract mode *)
348
302
(*****************************************************************************)
349
303
350
304
type extract = raw_json
351
305
306
+ (*****************************************************************************)
307
+ (* TODO: Secrets *)
308
+ (*****************************************************************************)
309
+
352
310
(*****************************************************************************)
353
311
(* Toplevel *)
354
312
(*****************************************************************************)
@@ -357,7 +315,7 @@ type rules = {
357
315
rules: rule list;
358
316
359
317
(* Missed count of pro rules when not logged-in.
360
- * Sent by the registry to the CLI since 1.48 .
318
+ * Sent by the registry to the CLI since 1.49 .
361
319
* See https://github.com/semgrep/semgrep-app/pull/11142
362
320
*)
363
321
?missed: int option;
0 commit comments