Skip to content

Commit deffcb8

Browse files
authored
Merge pull request #60 from returntocorp/clara/add-new-hashes
feat: add 4 new hashes to finding type for alternatives to syntactic_id and match_based_id that do not change when a file or rule gets renamed
2 parents 8cb76d3 + 3a5236b commit deffcb8

8 files changed

+2048
-1552
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,10 @@ rely on this.
99

1010
This repository is meant to be used as a submodule.
1111

12+
You may need to `install opam` as a pre-requisite for contributing to this repository.
13+
14+
To initialize your environment, run `make setup`, which should initialize the modules specified in `dev.opam`
15+
16+
To update an interface:
17+
1. Make changes to the appropriate .atd file
18+
2. Run `make`. This will propagate that change to the respective .py, .ts, .ml, etc.

dev.opam

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This is meant to be installed with 'opam install --deps-only .'
1+
# This is meant to be installed with 'opam install --deps-only . '
22
#
33
opam-version: "2.0"
44
maintainer: "r2c"
@@ -10,4 +10,5 @@ synopsis: "OCaml development dependencies"
1010
depends: [
1111
"atdpy" {>= "2.10.0"}
1212
"atdts" {>= "2.10.0"}
13+
"atdgen" {>= "2.10.0"}
1314
]

semgrep_output_v1.atd

+11
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,15 @@ type api_scans_findings <ocaml attr="deriving show"> = {
787787
cai_ids: string list;
788788
}
789789

790+
type finding_hashes <ocaml attr="deriving show"> = {
791+
start_line_hash: string;
792+
end_line_hash: string;
793+
(* hash of the syntactic_context/code contents from start_line through end_line *)
794+
code_hash: string;
795+
(* hash of the rule pattern with metavariables substituted in *)
796+
pattern_hash: string;
797+
}
798+
790799
(* TODO: rewrite rule_matches.to_app_finding_format() *)
791800
type finding <ocaml attr="deriving show"> = {
792801
check_id: rule_id;
@@ -804,6 +813,8 @@ type finding <ocaml attr="deriving show"> = {
804813
syntactic_id: string;
805814
(* since semgrep 0.98 *)
806815
?match_based_id: string option;
816+
(* since semgrep 1.14.0 *)
817+
?hashes: finding_hashes option;
807818
metadata: raw_json;
808819
is_blocking: bool;
809820
?fixed_lines: string list option;

semgrep_output_v1.jsonschema

+15-4
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@
8787
"minItems": 2,
8888
"items": false,
8989
"prefixItems": [
90-
{ "const": "CoreLoc" },
91-
{ "$ref": "#/definitions/location" }
90+
{ "const": "CoreLoc" }, { "$ref": "#/definitions/location" }
9291
]
9392
},
9493
{
@@ -433,8 +432,7 @@
433432
"minItems": 2,
434433
"items": false,
435434
"prefixItems": [
436-
{ "$ref": "#/definitions/location" },
437-
{ "type": "string" }
435+
{ "$ref": "#/definitions/location" }, { "type": "string" }
438436
]
439437
}
440438
]
@@ -709,6 +707,18 @@
709707
"cai_ids": { "type": "array", "items": { "type": "string" } }
710708
}
711709
},
710+
"finding_hashes": {
711+
"type": "object",
712+
"required": [
713+
"start_line_hash", "end_line_hash", "code_hash", "pattern_hash"
714+
],
715+
"properties": {
716+
"start_line_hash": { "type": "string" },
717+
"end_line_hash": { "type": "string" },
718+
"code_hash": { "type": "string" },
719+
"pattern_hash": { "type": "string" }
720+
}
721+
},
712722
"finding": {
713723
"type": "object",
714724
"required": [
@@ -729,6 +739,7 @@
729739
"commit_date": { "type": "string" },
730740
"syntactic_id": { "type": "string" },
731741
"match_based_id": { "type": "string" },
742+
"hashes": { "$ref": "#/definitions/finding_hashes" },
732743
"metadata": { "$ref": "#/definitions/raw_json" },
733744
"is_blocking": { "type": "boolean" },
734745
"fixed_lines": { "type": "array", "items": { "type": "string" } },

semgrep_output_v1.py

+41
Original file line numberDiff line numberDiff line change
@@ -1934,6 +1934,43 @@ def to_json_string(self, **kw: Any) -> str:
19341934
return json.dumps(self.to_json(), **kw)
19351935

19361936

1937+
@dataclass
1938+
class FindingHashes:
1939+
"""Original type: finding_hashes = { ... }"""
1940+
1941+
start_line_hash: str
1942+
end_line_hash: str
1943+
code_hash: str
1944+
pattern_hash: str
1945+
1946+
@classmethod
1947+
def from_json(cls, x: Any) -> 'FindingHashes':
1948+
if isinstance(x, dict):
1949+
return cls(
1950+
start_line_hash=_atd_read_string(x['start_line_hash']) if 'start_line_hash' in x else _atd_missing_json_field('FindingHashes', 'start_line_hash'),
1951+
end_line_hash=_atd_read_string(x['end_line_hash']) if 'end_line_hash' in x else _atd_missing_json_field('FindingHashes', 'end_line_hash'),
1952+
code_hash=_atd_read_string(x['code_hash']) if 'code_hash' in x else _atd_missing_json_field('FindingHashes', 'code_hash'),
1953+
pattern_hash=_atd_read_string(x['pattern_hash']) if 'pattern_hash' in x else _atd_missing_json_field('FindingHashes', 'pattern_hash'),
1954+
)
1955+
else:
1956+
_atd_bad_json('FindingHashes', x)
1957+
1958+
def to_json(self) -> Any:
1959+
res: Dict[str, Any] = {}
1960+
res['start_line_hash'] = _atd_write_string(self.start_line_hash)
1961+
res['end_line_hash'] = _atd_write_string(self.end_line_hash)
1962+
res['code_hash'] = _atd_write_string(self.code_hash)
1963+
res['pattern_hash'] = _atd_write_string(self.pattern_hash)
1964+
return res
1965+
1966+
@classmethod
1967+
def from_json_string(cls, x: str) -> 'FindingHashes':
1968+
return cls.from_json(json.loads(x))
1969+
1970+
def to_json_string(self, **kw: Any) -> str:
1971+
return json.dumps(self.to_json(), **kw)
1972+
1973+
19371974
@dataclass(frozen=True)
19381975
class CliMatchDataflowTrace:
19391976
"""Original type: cli_match_dataflow_trace = { ... }"""
@@ -1989,6 +2026,7 @@ class Finding:
19892026
metadata: RawJson
19902027
is_blocking: bool
19912028
match_based_id: Optional[str] = None
2029+
hashes: Optional[FindingHashes] = None
19922030
fixed_lines: Optional[List[str]] = None
19932031
sca_info: Optional[ScaInfo] = None
19942032
dataflow_trace: Optional[CliMatchDataflowTrace] = None
@@ -2011,6 +2049,7 @@ def from_json(cls, x: Any) -> 'Finding':
20112049
metadata=RawJson.from_json(x['metadata']) if 'metadata' in x else _atd_missing_json_field('Finding', 'metadata'),
20122050
is_blocking=_atd_read_bool(x['is_blocking']) if 'is_blocking' in x else _atd_missing_json_field('Finding', 'is_blocking'),
20132051
match_based_id=_atd_read_string(x['match_based_id']) if 'match_based_id' in x else None,
2052+
hashes=FindingHashes.from_json(x['hashes']) if 'hashes' in x else None,
20142053
fixed_lines=_atd_read_list(_atd_read_string)(x['fixed_lines']) if 'fixed_lines' in x else None,
20152054
sca_info=ScaInfo.from_json(x['sca_info']) if 'sca_info' in x else None,
20162055
dataflow_trace=CliMatchDataflowTrace.from_json(x['dataflow_trace']) if 'dataflow_trace' in x else None,
@@ -2035,6 +2074,8 @@ def to_json(self) -> Any:
20352074
res['is_blocking'] = _atd_write_bool(self.is_blocking)
20362075
if self.match_based_id is not None:
20372076
res['match_based_id'] = _atd_write_string(self.match_based_id)
2077+
if self.hashes is not None:
2078+
res['hashes'] = (lambda x: x.to_json())(self.hashes)
20382079
if self.fixed_lines is not None:
20392080
res['fixed_lines'] = _atd_write_list(_atd_write_string)(self.fixed_lines)
20402081
if self.sca_info is not None:

0 commit comments

Comments
 (0)