Skip to content

Commit 9f1c503

Browse files
authored
feat: more granular pro engine config metrics (#202)
Creates a new metrics type which is closely aligned with osemgrep's Engine_type.t which allows for more precise tracking of the engine feature matrix. This is needed compared to a simple enumeration since we have more orthogonal features than previously. For instance, we can now perform analysis on at least two axes (within pro): (1) secrets valdiation [enabled <-> disabled]; (2) dataflow [none <-> ... <-> interfile] Creating a new enum entry for each possible permutation is not futureproof (and is ugly), so we need a more complex type here.
1 parent eb5a93e commit 9f1c503

File tree

2 files changed

+305
-0
lines changed

2 files changed

+305
-0
lines changed

semgrep_metrics.atd

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,35 @@ type pro_features = {
156156
?diffDepth <ocaml mutable>: int option;
157157
}
158158

159+
type analysis_type <ocaml attr="deriving show"> = [
160+
| Intraprocedural
161+
| Interprocedural
162+
| Interfile
163+
]
164+
165+
type code_config <ocaml attr="deriving show"> = unit
166+
167+
type secrets_origin <ocaml attr="deriving show"> = [ Any | Semgrep ]
168+
type secrets_config
169+
<ocaml attr="deriving show"> = {
170+
permitted_origins: secrets_origin;
171+
}
172+
173+
type supply_chain_config <ocaml attr="deriving show"> = unit
174+
175+
(* Since v1.54.0 *)
176+
type engine_config
177+
<ocaml attr="deriving show"> = {
178+
analysis_type: analysis_type;
179+
pro_langs: bool;
180+
(* `Some c` where `c` is the config if the product was run.
181+
* `None` if it was not run.
182+
*)
183+
?code_config: code_config option;
184+
?secrets_config: secrets_config option;
185+
?supply_chain_config: supply_chain_config option;
186+
}
187+
159188
type misc = {
160189
(* coupling: features is commented a lot in semgrep/PRIVACY.md *)
161190
features <ocaml mutable>: string list;
@@ -166,6 +195,8 @@ type misc = {
166195
?ruleHashesWithFindings <ocaml mutable>: (string * int) list <json repr="object"> option;
167196
(* TODO: should be OSS | Pro, see semgrep_output_v1.atd engine_kind type *)
168197
~engineRequested <python default="'OSS'"> <ocaml mutable>: string;
198+
(* Since Semgrep 1.54.0 *)
199+
?engineConfig <ocaml mutable>: engine_config option;
169200
(* Since Semgrep 1.49.0 *)
170201
?interfileLanguagesUsed: string list option;
171202
}

semgrep_metrics.py

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,30 @@ def to_json_string(self, **kw: Any) -> str:
284284
return json.dumps(self.to_json(), **kw)
285285

286286

287+
@dataclass
288+
class SupplyChainConfig:
289+
"""Original type: supply_chain_config = { ... }"""
290+
291+
@classmethod
292+
def from_json(cls, x: Any) -> 'SupplyChainConfig':
293+
if isinstance(x, dict):
294+
return cls(
295+
)
296+
else:
297+
_atd_bad_json('SupplyChainConfig', x)
298+
299+
def to_json(self) -> Any:
300+
res: Dict[str, Any] = {}
301+
return res
302+
303+
@classmethod
304+
def from_json_string(cls, x: str) -> 'SupplyChainConfig':
305+
return cls.from_json(json.loads(x))
306+
307+
def to_json_string(self, **kw: Any) -> str:
308+
return json.dumps(self.to_json(), **kw)
309+
310+
287311
@dataclass
288312
class Sha256:
289313
"""Original type: sha256"""
@@ -305,6 +329,100 @@ def to_json_string(self, **kw: Any) -> str:
305329
return json.dumps(self.to_json(), **kw)
306330

307331

332+
@dataclass
333+
class Any_:
334+
"""Original type: secrets_origin = [ ... | Any | ... ]"""
335+
336+
@property
337+
def kind(self) -> str:
338+
"""Name of the class representing this variant."""
339+
return 'Any_'
340+
341+
@staticmethod
342+
def to_json() -> Any:
343+
return 'Any'
344+
345+
def to_json_string(self, **kw: Any) -> str:
346+
return json.dumps(self.to_json(), **kw)
347+
348+
349+
@dataclass
350+
class Semgrep:
351+
"""Original type: secrets_origin = [ ... | Semgrep | ... ]"""
352+
353+
@property
354+
def kind(self) -> str:
355+
"""Name of the class representing this variant."""
356+
return 'Semgrep'
357+
358+
@staticmethod
359+
def to_json() -> Any:
360+
return 'Semgrep'
361+
362+
def to_json_string(self, **kw: Any) -> str:
363+
return json.dumps(self.to_json(), **kw)
364+
365+
366+
@dataclass
367+
class SecretsOrigin:
368+
"""Original type: secrets_origin = [ ... ]"""
369+
370+
value: Union[Any_, Semgrep]
371+
372+
@property
373+
def kind(self) -> str:
374+
"""Name of the class representing this variant."""
375+
return self.value.kind
376+
377+
@classmethod
378+
def from_json(cls, x: Any) -> 'SecretsOrigin':
379+
if isinstance(x, str):
380+
if x == 'Any':
381+
return cls(Any_())
382+
if x == 'Semgrep':
383+
return cls(Semgrep())
384+
_atd_bad_json('SecretsOrigin', x)
385+
_atd_bad_json('SecretsOrigin', x)
386+
387+
def to_json(self) -> Any:
388+
return self.value.to_json()
389+
390+
@classmethod
391+
def from_json_string(cls, x: str) -> 'SecretsOrigin':
392+
return cls.from_json(json.loads(x))
393+
394+
def to_json_string(self, **kw: Any) -> str:
395+
return json.dumps(self.to_json(), **kw)
396+
397+
398+
@dataclass
399+
class SecretsConfig:
400+
"""Original type: secrets_config = { ... }"""
401+
402+
permitted_origins: SecretsOrigin
403+
404+
@classmethod
405+
def from_json(cls, x: Any) -> 'SecretsConfig':
406+
if isinstance(x, dict):
407+
return cls(
408+
permitted_origins=SecretsOrigin.from_json(x['permitted_origins']) if 'permitted_origins' in x else _atd_missing_json_field('SecretsConfig', 'permitted_origins'),
409+
)
410+
else:
411+
_atd_bad_json('SecretsConfig', x)
412+
413+
def to_json(self) -> Any:
414+
res: Dict[str, Any] = {}
415+
res['permitted_origins'] = (lambda x: x.to_json())(self.permitted_origins)
416+
return res
417+
418+
@classmethod
419+
def from_json_string(cls, x: str) -> 'SecretsConfig':
420+
return cls.from_json(json.loads(x))
421+
422+
def to_json_string(self, **kw: Any) -> str:
423+
return json.dumps(self.to_json(), **kw)
424+
425+
308426
@dataclass
309427
class RuleStats:
310428
"""Original type: rule_stats = { ... }"""
@@ -502,6 +620,158 @@ def to_json_string(self, **kw: Any) -> str:
502620
return json.dumps(self.to_json(), **kw)
503621

504622

623+
@dataclass
624+
class CodeConfig:
625+
"""Original type: code_config = { ... }"""
626+
627+
@classmethod
628+
def from_json(cls, x: Any) -> 'CodeConfig':
629+
if isinstance(x, dict):
630+
return cls(
631+
)
632+
else:
633+
_atd_bad_json('CodeConfig', x)
634+
635+
def to_json(self) -> Any:
636+
res: Dict[str, Any] = {}
637+
return res
638+
639+
@classmethod
640+
def from_json_string(cls, x: str) -> 'CodeConfig':
641+
return cls.from_json(json.loads(x))
642+
643+
def to_json_string(self, **kw: Any) -> str:
644+
return json.dumps(self.to_json(), **kw)
645+
646+
647+
@dataclass
648+
class Intraprocedural:
649+
"""Original type: analysis_type = [ ... | Intraprocedural | ... ]"""
650+
651+
@property
652+
def kind(self) -> str:
653+
"""Name of the class representing this variant."""
654+
return 'Intraprocedural'
655+
656+
@staticmethod
657+
def to_json() -> Any:
658+
return 'Intraprocedural'
659+
660+
def to_json_string(self, **kw: Any) -> str:
661+
return json.dumps(self.to_json(), **kw)
662+
663+
664+
@dataclass
665+
class Interprocedural:
666+
"""Original type: analysis_type = [ ... | Interprocedural | ... ]"""
667+
668+
@property
669+
def kind(self) -> str:
670+
"""Name of the class representing this variant."""
671+
return 'Interprocedural'
672+
673+
@staticmethod
674+
def to_json() -> Any:
675+
return 'Interprocedural'
676+
677+
def to_json_string(self, **kw: Any) -> str:
678+
return json.dumps(self.to_json(), **kw)
679+
680+
681+
@dataclass
682+
class Interfile:
683+
"""Original type: analysis_type = [ ... | Interfile | ... ]"""
684+
685+
@property
686+
def kind(self) -> str:
687+
"""Name of the class representing this variant."""
688+
return 'Interfile'
689+
690+
@staticmethod
691+
def to_json() -> Any:
692+
return 'Interfile'
693+
694+
def to_json_string(self, **kw: Any) -> str:
695+
return json.dumps(self.to_json(), **kw)
696+
697+
698+
@dataclass
699+
class AnalysisType:
700+
"""Original type: analysis_type = [ ... ]"""
701+
702+
value: Union[Intraprocedural, Interprocedural, Interfile]
703+
704+
@property
705+
def kind(self) -> str:
706+
"""Name of the class representing this variant."""
707+
return self.value.kind
708+
709+
@classmethod
710+
def from_json(cls, x: Any) -> 'AnalysisType':
711+
if isinstance(x, str):
712+
if x == 'Intraprocedural':
713+
return cls(Intraprocedural())
714+
if x == 'Interprocedural':
715+
return cls(Interprocedural())
716+
if x == 'Interfile':
717+
return cls(Interfile())
718+
_atd_bad_json('AnalysisType', x)
719+
_atd_bad_json('AnalysisType', x)
720+
721+
def to_json(self) -> Any:
722+
return self.value.to_json()
723+
724+
@classmethod
725+
def from_json_string(cls, x: str) -> 'AnalysisType':
726+
return cls.from_json(json.loads(x))
727+
728+
def to_json_string(self, **kw: Any) -> str:
729+
return json.dumps(self.to_json(), **kw)
730+
731+
732+
@dataclass
733+
class EngineConfig:
734+
"""Original type: engine_config = { ... }"""
735+
736+
analysis_type: AnalysisType
737+
pro_langs: bool
738+
code_config: Optional[CodeConfig] = None
739+
secrets_config: Optional[SecretsConfig] = None
740+
supply_chain_config: Optional[SupplyChainConfig] = None
741+
742+
@classmethod
743+
def from_json(cls, x: Any) -> 'EngineConfig':
744+
if isinstance(x, dict):
745+
return cls(
746+
analysis_type=AnalysisType.from_json(x['analysis_type']) if 'analysis_type' in x else _atd_missing_json_field('EngineConfig', 'analysis_type'),
747+
pro_langs=_atd_read_bool(x['pro_langs']) if 'pro_langs' in x else _atd_missing_json_field('EngineConfig', 'pro_langs'),
748+
code_config=CodeConfig.from_json(x['code_config']) if 'code_config' in x else None,
749+
secrets_config=SecretsConfig.from_json(x['secrets_config']) if 'secrets_config' in x else None,
750+
supply_chain_config=SupplyChainConfig.from_json(x['supply_chain_config']) if 'supply_chain_config' in x else None,
751+
)
752+
else:
753+
_atd_bad_json('EngineConfig', x)
754+
755+
def to_json(self) -> Any:
756+
res: Dict[str, Any] = {}
757+
res['analysis_type'] = (lambda x: x.to_json())(self.analysis_type)
758+
res['pro_langs'] = _atd_write_bool(self.pro_langs)
759+
if self.code_config is not None:
760+
res['code_config'] = (lambda x: x.to_json())(self.code_config)
761+
if self.secrets_config is not None:
762+
res['secrets_config'] = (lambda x: x.to_json())(self.secrets_config)
763+
if self.supply_chain_config is not None:
764+
res['supply_chain_config'] = (lambda x: x.to_json())(self.supply_chain_config)
765+
return res
766+
767+
@classmethod
768+
def from_json_string(cls, x: str) -> 'EngineConfig':
769+
return cls.from_json(json.loads(x))
770+
771+
def to_json_string(self, **kw: Any) -> str:
772+
return json.dumps(self.to_json(), **kw)
773+
774+
505775
@dataclass
506776
class Misc:
507777
"""Original type: misc = { ... }"""
@@ -512,6 +782,7 @@ class Misc:
512782
numIgnored: Optional[int] = None
513783
ruleHashesWithFindings: Optional[List[Tuple[str, int]]] = None
514784
engineRequested: str = field(default_factory=lambda: 'OSS')
785+
engineConfig: Optional[EngineConfig] = None
515786
interfileLanguagesUsed: Optional[List[str]] = None
516787

517788
@classmethod
@@ -524,6 +795,7 @@ def from_json(cls, x: Any) -> 'Misc':
524795
numIgnored=_atd_read_int(x['numIgnored']) if 'numIgnored' in x else None,
525796
ruleHashesWithFindings=_atd_read_assoc_object_into_list(_atd_read_int)(x['ruleHashesWithFindings']) if 'ruleHashesWithFindings' in x else None,
526797
engineRequested=_atd_read_string(x['engineRequested']) if 'engineRequested' in x else 'OSS',
798+
engineConfig=EngineConfig.from_json(x['engineConfig']) if 'engineConfig' in x else None,
527799
interfileLanguagesUsed=_atd_read_list(_atd_read_string)(x['interfileLanguagesUsed']) if 'interfileLanguagesUsed' in x else None,
528800
)
529801
else:
@@ -541,6 +813,8 @@ def to_json(self) -> Any:
541813
if self.ruleHashesWithFindings is not None:
542814
res['ruleHashesWithFindings'] = _atd_write_assoc_list_to_object(_atd_write_int)(self.ruleHashesWithFindings)
543815
res['engineRequested'] = _atd_write_string(self.engineRequested)
816+
if self.engineConfig is not None:
817+
res['engineConfig'] = (lambda x: x.to_json())(self.engineConfig)
544818
if self.interfileLanguagesUsed is not None:
545819
res['interfileLanguagesUsed'] = _atd_write_list(_atd_write_string)(self.interfileLanguagesUsed)
546820
return res

0 commit comments

Comments
 (0)