27
27
* Note that this file does not replace Parse_rule.ml nor Rule.ml. We still
28
28
* want to accept the old syntax in Parse_rule.ml and also parse with
29
29
* position information and error recovery which ATD does not provide.
30
- * This files does not replace either (yet) rule_schema_v1.yml which is
31
- * more complete .
30
+ * This files does not replace either (yet) rule_schema_v1.yml which covers
31
+ * also the old syntax .
32
32
*
33
33
* TODO:
34
- * - taint
35
34
* - extract
36
35
* - r2c-internal-project-depends-on-content
37
36
* - secrets
38
- * - steps (and join?)
39
- * - generalized taint
37
+ * - steps (but not join)
40
38
* - new metavariable types
41
- * - new 'anywhere:'
39
+ * - generalized taint?
40
+ *
41
+ * related documents:
42
+ * - rule_schema_v1.yaml (actually less complete for the new syntax now)
43
+ * - Parse_rule.ml (the final source of truth, except for stuff currently
44
+ * handled only in pysemgrep such as join-mode or ssc)
42
45
*)
43
46
44
47
(*****************************************************************************)
@@ -69,13 +72,12 @@ type rule = {
69
72
70
73
(* CHECK: exactly one of those fields must be set *)
71
74
?match_ <json name="match">: formula option;
72
- ?taint: taint_spec option;
75
+ ?taint: taint option;
73
76
?extract: extract option;
74
- (* TODO: join, steps, secrets, sca *)
75
-
77
+ (* TODO: steps, secrets, sca *)
76
78
(* TODO? product: product; *)
77
79
78
- (* TODO? could be replaced by a pattern-filename: *)
80
+ (* alt: later: could be replaced by a pattern-filename: *)
79
81
?paths: paths option;
80
82
81
83
?fix: string option;
@@ -95,20 +97,19 @@ type rule = {
95
97
type rule_id = string wrap <ocaml module="Rule_ID">
96
98
97
99
(* Version_info.t *)
98
- type version = string (* TODO wrap <ocaml module="ATDStringWrap.Version"> *)
100
+ type version = string
99
101
100
102
(*****************************************************************************)
101
103
(* Types of rule fields *)
102
104
(*****************************************************************************)
103
105
104
- (* coupling: semgrep_output_v1.atd with match_severity *)
106
+ (* coupling: semgrep_output_v1.atd with match_severity
107
+ * I've removed EXPERIMENT and INVENTORY which should not be used.
108
+ *)
105
109
type severity = [
106
110
| Error <json name="ERROR">
107
111
| Warning <json name="WARNING">
108
112
| Info <json name="INFO">
109
- (* should not be used *)
110
- | Experiment <json name="EXPERIMENT">
111
- | Inventory <json name="INVENTORY">
112
113
]
113
114
114
115
(* coupling: language.ml *)
@@ -171,8 +172,9 @@ type language = [
171
172
]
172
173
173
174
type paths = {
174
- ~include_ <json name="include">: glob list;
175
- ~exclude: glob list;
175
+ (* CHECK: at least one of this field is set *)
176
+ ?include_ <json name="include">: glob list option;
177
+ ?exclude_ <json name="exclude">: glob list option;
176
178
}
177
179
178
180
type fix_regex = {
@@ -195,35 +197,46 @@ type rule_options <ocaml from="Rule_options" t="t"> = abstract
195
197
* proper variant, but that would require a more complex adapter and the
196
198
* distance between the spec and the actual syntax would be even longer.
197
199
*
200
+ * alt: we could instead do '?all: formula list option * condition list'
201
+ * below, but syntactically we also allow 'where' with pattern:, regex:,
202
+ * etc. as in:
203
+ * - pattern: "foo($X)"
204
+ * where: ...
205
+ * In fact that's the main reason we sometimes have to use pattern: string
206
+ * instead of a string because where: could not be attached to a string.
207
+ *
198
208
* old: this type was called new-pattern in rule_schema_v1.yaml
199
- *)
209
+ *
210
+ * CHECK: exactly one of pattern/regex/all/any/not/inside/anywhere field
211
+ * must be set
212
+ * CHECK: not/inside/anywhere can appear only inside an all:
213
+ *)
200
214
type formula = {
201
- (* CHECK: exactly one of those fields must be set *)
202
215
(* either directly a string or pattern: string in the JSON *)
203
- ?pattern: string option;
216
+ ?pattern: pattern option;
217
+ (* regex can also be entered with pattern: xxx when languages: [regex] *)
204
218
?regex: regex option;
219
+
220
+ (* Boolean opeators. alt: we could have chosen and: and or: *)
205
221
?all: formula list option;
206
222
?any: formula list option;
207
- (* CHECK: not/inside/anywhere can appear only inside an all: *)
208
223
?not: formula option;
224
+
225
+ (* later: we should remove with a better range logic *)
209
226
?inside: formula option;
227
+ (* NEW: since 1.49 *)
210
228
?anywhere: formula option;
211
- (* TODO? ?taint: taint_spec *)
212
-
213
- (* alt: we could instead do '?all: formula list option * condition list'
214
- * above, but syntactically we also allow 'where' with pattern:, regex:,
215
- * etc. as in:
216
- *
217
- * - pattern: "foo($X)"
218
- * where: ...
219
- *
220
- * In fact that's the main reason we sometimes have to use pattern: string
221
- * instead of a string because where: could not be attached to a string.
222
- *)
223
- ~where: condition list;
229
+ (* TODO? ?taint: taint *)
230
+
231
+ ?where: condition list option;
224
232
}
225
233
<json adapter.ocaml="Rule_schema_v2_adapter.Formula">
226
234
235
+ (* This string must be a valid Semgrep pattern for the first language
236
+ * specified in the languages: list in the rule.
237
+ *)
238
+ type pattern = string
239
+
227
240
(* Just like for formula, we're using an adapter to transform
228
241
* conditions in YAML like:
229
242
*
@@ -262,10 +275,15 @@ type focus = {
262
275
263
276
type mvar = string
264
277
278
+ (* comparison expression with metavariables (currently using a Python-like
279
+ * syntax), ex: $X > 100
280
+ *)
281
+ type comparison_expr = string
282
+
265
283
type comparison = {
266
- comparison: string; (* expr *)
284
+ comparison: comparison_expr;
267
285
?base: int option;
268
- ~ strip: bool;
286
+ ? strip: bool option ;
269
287
}
270
288
271
289
type metavariable_cond = {
@@ -288,10 +306,98 @@ type analyzer = [
288
306
]
289
307
290
308
(*****************************************************************************)
291
- (* TODO: Tainting *)
309
+ (* Tainting *)
292
310
(*****************************************************************************)
293
311
294
- type taint_spec = raw_json
312
+ (* STRICTER: actually rule_schema_v1.yaml has very loose definitions for
313
+ * tainting stuff. Even requires: label: are not defined for the
314
+ * old syntax, and for the new syntax many fields are still missing
315
+ * in rule_schema_v1.yaml
316
+ *)
317
+ type taint = {
318
+ sources: source list;
319
+ sinks: sink list;
320
+ ?sanitizers: sanitizer list option;
321
+ ?propagators: propagator list option;
322
+ }
323
+
324
+ (* --------------------------- *)
325
+ (* Some taint options *)
326
+ (* --------------------------- *)
327
+
328
+ type label_options = {
329
+ ?label: label option;
330
+ ?requires: requires_expr option;
331
+ }
332
+
333
+ type label = string
334
+
335
+ (* a boolean expression with labels, ex: "A and B" *)
336
+ type requires_expr = string
337
+
338
+ (* STRICTER: not even specified in rule_schema_v1.yaml *)
339
+ type taint_options = {
340
+ ?by_side_effect <json name="by-side-effect">: by_side_effect option;
341
+ ?exact: bool option;
342
+ }
343
+
344
+ (* we need an adapter here because we allow boolean or "only" string *)
345
+ type by_side_effect = [
346
+ | True <json name="true">
347
+ | False <json name="false">
348
+ | Only <json name="only">
349
+ ]
350
+ <json adapter.ocaml="Rule_schema_v2_adapter.BySideEffect">
351
+
352
+
353
+ (* --------------------------- *)
354
+ (* Source *)
355
+ (* --------------------------- *)
356
+
357
+ (* need to repeat the adapter below for the str -> pattern: str adaptation *)
358
+ type source = {
359
+ inherit formula;
360
+ inherit label_options;
361
+ inherit taint_options;
362
+ ?control: bool option;
363
+ }
364
+ <json adapter.ocaml="Rule_schema_v2_adapter.Formula">
365
+
366
+ (* --------------------------- *)
367
+ (* Sink *)
368
+ (* --------------------------- *)
369
+ type sink = {
370
+ inherit formula;
371
+ (* just requires: here, no label: *)
372
+ ?requires: string option; (* expr with labels? *)
373
+ }
374
+ <json adapter.ocaml="Rule_schema_v2_adapter.Formula">
375
+
376
+ (* --------------------------- *)
377
+ (* Sanitizer *)
378
+ (* --------------------------- *)
379
+
380
+ type sanitizer = {
381
+ inherit formula;
382
+ inherit taint_options;
383
+ (* TODO: not-conflicting: *)
384
+ }
385
+ <json adapter.ocaml="Rule_schema_v2_adapter.Formula">
386
+
387
+ (* --------------------------- *)
388
+ (* Propagator *)
389
+ (* --------------------------- *)
390
+
391
+ type propagator = {
392
+ inherit formula;
393
+ from_ <json name="from">: mvar;
394
+ to_ <json name="to">: mvar;
395
+ inherit label_options;
396
+ (* no exact: here, just by-side-effect: *)
397
+ ?by_side_effect <json name="by-side-effect">: by_side_effect option;
398
+ (* TODO? replace-labels? *)
399
+ }
400
+ <json adapter.ocaml="Rule_schema_v2_adapter.Formula">
295
401
296
402
(*****************************************************************************)
297
403
(* TODO: SSC *)
0 commit comments