Skip to content

Commit 4a167cc

Browse files
authored
Extend the definition of 'analyzer' in Semgrep rules (#286)
This PR extends the Semgrep rule schema in accordance with the work I did on the OCaml side. The PR is semgrep/semgrep-proprietary#1641 and I'm going to push my work to it very soon. This PR also adds JSON Schema validation tests. They're useful to check that the JSON Schema is valid, that it validates valid Semgrep rules, and rejects invalid Semgrep rules - without having to rely on another git repo to test this. test plan: `make test` This will run `check-jsonschema`. `make setup` will ask the user to install it if it's missing. - [x] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check) - [x] I made sure we're still backward compatible with old versions of the CLI. For example, the Semgrep backend need to still be able to *consume* data generated by Semgrep 1.17.0. See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
1 parent 9992e65 commit 4a167cc

14 files changed

+254
-6
lines changed

Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ clean:
7474
# This takes a while but ensures we use the correct versions of the atd tools.
7575
.PHONY: setup
7676
setup:
77+
# Please install check-jsonschema (Python tool) if this fails:
78+
check-jsonschema --version
7779
opam update
7880
opam install --deps-only .
7981

rule_schema_v1.yaml

+34-5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,38 @@ $defs:
3535
oneOf:
3636
- required: [ http ]
3737
- required: [ aws ]
38+
analyzer:
39+
title: Analyzer to use
40+
oneOf:
41+
- const: entropy
42+
- const: entropy_v2
43+
- const: redos
44+
- type: object
45+
properties:
46+
kind:
47+
const: entropy
48+
required:
49+
- kind
50+
additionalProperties: false
51+
- type: object
52+
properties:
53+
kind:
54+
const: entropy_v2
55+
mode:
56+
oneOf:
57+
- const: lax
58+
- const: strict
59+
- const: default
60+
required:
61+
- kind
62+
additionalProperties: false
63+
- type: object
64+
properties:
65+
kind:
66+
const: redos
67+
required:
68+
- kind
69+
additionalProperties: false
3870
# EXPERIMENTAL
3971
aws-request-content:
4072
properties:
@@ -227,7 +259,7 @@ $defs:
227259
metavariable:
228260
type: string
229261
analyzer:
230-
type: string
262+
$ref: "#/$defs/analyzer"
231263
general-pattern-content:
232264
title: "Return finding where code matches against the following pattern"
233265
oneOf:
@@ -482,13 +514,10 @@ $defs:
482514
title: Inspect a metavariable with a given analyzer
483515
properties:
484516
analyzer:
485-
type: string
486-
title: Analyzer to use
517+
$ref: "#/$defs/analyzer"
487518
metavariable:
488519
type: string
489520
title: Metavariable to analyze
490-
options:
491-
type: object
492521
required:
493522
- analyzer
494523
- metavariable

rule_schema_v2.atd

+8-1
Original file line numberDiff line numberDiff line change
@@ -432,9 +432,16 @@ type mvar = string
432432

433433
type analyzer = [
434434
| Entropy <json name="entropy">
435-
| EntropyV2 <json name="entropy_v2">
435+
| EntropyV2 <json name="entropy_v2"> of entropy_analysis_mode
436436
| Redos <json name="redos">
437437
]
438+
<json adapter.ocaml="Rule_schema_v2_adapter.Analyzer">
439+
440+
type entropy_analysis_mode = [
441+
| Lax <json name="lax">
442+
| Default <json name="default">
443+
| Strict <json name="strict">
444+
]
438445

439446
(* --------------------------- *)
440447
(* Focus condition *)

rule_schema_v2_adapter.ml

+94
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,97 @@ module ProjectDependsOn = struct
6969
let restore (_atd : Yojson.Safe.t) : Yojson.Safe.t =
7070
failwith "Rule_schema_v2_adapter.ProjectDependsOn.restore not implemented"
7171
end
72+
73+
(* This is the name of the field that contains the variant constructor
74+
in the user-friendly YAML convention we use to represent variants.
75+
See 'normalize_variant'. *)
76+
let kind_field_name = "kind"
77+
78+
(*
79+
A generic representation for variants. The parameters, if any, must be
80+
an ATD record (JSON object, Yojson assoc).
81+
82+
type t = [
83+
| A <json name="a">
84+
| B <json name="b"> of b
85+
]
86+
87+
type b = {
88+
(* all the fields are optional *)
89+
?k: int option;
90+
}
91+
92+
1. OCaml A is represented as JSON "A". The adapter doesn't change it.
93+
2. OCaml B {k = 42} is represented as JSON {"kind": "B", "k": 42}
94+
which the adapter converts to JSON ["kind", {"k", 42}].
95+
96+
Additionally, the alternate notations {"kind": "A"} and "B" can be
97+
supported in addition to "A" and {"kind": "B"}. This requires specifying
98+
the constructors for which the alternate notation is supported.
99+
Constructors that don't expect an argument must be listed as 'enum'.
100+
Constructors that expect an object argument must be listed as 'obj'.
101+
This gives us the following call:
102+
103+
normalize_generic_variant ~enum:["a"] ~obj:["b"] json
104+
105+
Without specifying 'enum' or 'obj', YAML/JSON interpretation will be
106+
stricter by not tolerating the alternate notations {"kind": "A"} or "B".
107+
108+
YAML example:
109+
110+
- a
111+
112+
- kind: b
113+
k: 42
114+
115+
# assuming default properties:
116+
- kind: b
117+
118+
# shorthand for {kind: b}:
119+
- b
120+
121+
# long form for "a":
122+
- kind: a
123+
124+
TODO: make the ATD tools (atdgen, atdpy, ...) support these alternate
125+
formats as well?
126+
This would allow us to make adapters generic i.e. without
127+
having to specify the 'enum' and 'obj' options. In the example above,
128+
atdgen would read "b" as ["b", {}] and would read ["a", {}] or ["a", null]
129+
as "a" without complaining.
130+
*)
131+
let normalize_variant
132+
?(enum = [])
133+
?(obj = [])
134+
(orig : Yojson.Safe.t ) : Yojson.Safe.t =
135+
match orig with
136+
| `Assoc props ->
137+
(match List.partition (fun (k, _v) -> k = kind_field_name) props with
138+
| [_, `String kind], [] when List.mem kind enum -> `String kind
139+
| [_, `String kind], other_fields ->
140+
`List [`String kind; `Assoc other_fields]
141+
| _missing_or_duplicate_kind, _ -> orig
142+
)
143+
| `String kind when List.mem kind obj -> `List [`String kind; `Assoc []]
144+
| _string_or_malformed -> orig
145+
146+
(* Unlike 'normalize_variant', this if fully generic.
147+
(because we're going from a strict format to a looser format) *)
148+
let restore_variant
149+
(atd : Yojson.Safe.t ) : Yojson.Safe.t =
150+
match atd with
151+
| `String _ as str -> str
152+
| `List [`String _ as kind; `Assoc fields] ->
153+
`Assoc ((kind_field_name, kind) :: fields)
154+
| _malformed -> atd
155+
156+
module Analyzer = struct
157+
let normalize orig =
158+
normalize_variant
159+
~enum:["entropy"; "redos"]
160+
~obj:["entropy_v2"]
161+
orig
162+
163+
let restore (atd : Yojson.Safe.t) : Yojson.Safe.t =
164+
restore_variant atd
165+
end

tests/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
.PHONY: test
33
test:
44
./test-ast
5+
$(MAKE) -C jsonschema test
56

67
.PHONY: clean
78
clean:

tests/jsonschema/Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Validate sample YAML data against JSON Schemas
2+
.PHONY: test
3+
test:
4+
./validate ../../rule_schema_v1.yaml rules
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer: entropt
8+
message: Semgrep found a match
9+
languages: [python]
10+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer:
8+
kind: entropy
9+
# illegal option for 'entropy':
10+
mode: strict
11+
message: Semgrep found a match
12+
languages: [python]
13+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer: entropy
8+
message: Semgrep found a match
9+
languages: [python]
10+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer:
8+
kind: entropy_v2
9+
mode: badass
10+
message: Semgrep found a match
11+
languages: [python]
12+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer:
8+
kind: entropy_v2
9+
mode: strict
10+
message: Semgrep found a match
11+
languages: [python]
12+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer:
8+
kind: entropy_v2
9+
message: Semgrep found a match
10+
languages: [python]
11+
severity: WARNING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
rules:
2+
- id: test-entropy
3+
patterns:
4+
- pattern: "$STRING"
5+
- metavariable-analysis:
6+
metavariable: $STRING
7+
analyzer: entropy_v2
8+
message: Semgrep found a match
9+
languages: [python]
10+
severity: WARNING

tests/jsonschema/validate

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#! /usr/bin/env bash
2+
#
3+
# Use a JSON Schema validator to check input files that should pass or fail.
4+
#
5+
# A file with the .ok.yaml extension is expected to pass validation.
6+
# A file with the .fail.yaml extenstion is expected to fail.
7+
#
8+
set -eu
9+
10+
schema_file=$1
11+
input_dir=$2
12+
13+
# Check that check-jsonschema is installed
14+
# Installation instructions:
15+
# https://github.com/python-jsonschema/check-jsonschema
16+
check-jsonschema --version
17+
18+
# Check well-formed files
19+
check-jsonschema --schemafile "$schema_file" "$input_dir"/*.ok.yaml
20+
21+
exit_code=0
22+
23+
# Check that malformed files are detected
24+
for input_file in "$input_dir"/*.fail.yaml; do
25+
if check-jsonschema --schemafile "$schema_file" "$input_file"; then
26+
echo "*** $input_file: should have failed validation" >&2
27+
exit_code=1
28+
else
29+
echo "XFAIL (failed as expected)"
30+
fi
31+
done
32+
33+
exit "$exit_code"

0 commit comments

Comments
 (0)