From a12009e66159f614b65e38075323dcdd62728b9c Mon Sep 17 00:00:00 2001 From: Cooper Pierce Date: Fri, 3 Nov 2023 12:56:09 -0700 Subject: [PATCH 1/3] Add additional internal pattern combinator Allows `semgrep-internal-patterns-allow-disjoint` to appear everywhere (other than taint) where a `patterns` may appear. See notion for semantics. --- rule_schema_v1.yaml | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/rule_schema_v1.yaml b/rule_schema_v1.yaml index ff892211..82575ac0 100644 --- a/rule_schema_v1.yaml +++ b/rule_schema_v1.yaml @@ -217,6 +217,7 @@ $defs: - required: [ pattern ] - required: [ pattern-regex ] - required: [ patterns ] + - required: [ semgrep-internal-patterns-allow-disjoint ] - required: [ pattern-either ] - required: [ pattern-not ] - required: [ pattern-inside ] @@ -228,6 +229,8 @@ $defs: type: string patterns: $ref: "#/$defs/patterns-content" + semgrep-internal-patterns-allow-disjoint: + $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" pattern-not: @@ -244,6 +247,7 @@ $defs: items: anyOf: - $ref: "#/$defs/patterns" + - $ref: "#/$defs/semgrep-internal-patterns-allow-disjoint" - $ref: "#/$defs/pattern-either" - $ref: "#/$defs/focus-metavariable" - $ref: "#/$defs/pattern-inside" @@ -264,6 +268,7 @@ $defs: items: anyOf: - $ref: "#/$defs/patterns" + - $ref: "#/$defs/semgrep-internal-patterns-allow-disjoint" - $ref: "#/$defs/pattern-either" - $ref: "#/$defs/pattern-inside" - $ref: "#/$defs/pattern" @@ -503,6 +508,8 @@ $defs: type: string patterns: $ref: "#/$defs/patterns-content" + semgrep-internal-patterns-allow-disjoint: + $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" required: @@ -514,6 +521,8 @@ $defs: anyOf: - required: - patterns + - required: + - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: @@ -524,6 +533,20 @@ $defs: anyOf: - required: - pattern + - required: + - semgrep-internal-patterns-allow-disjoint + - required: + - pattern-either + - required: + - pattern-regex + - required: + - semgrep-internal-patterns-allow-disjoint + not: + anyOf: + - required: + - pattern + - required: + - patterns - required: - pattern-either - required: @@ -536,6 +559,8 @@ $defs: - pattern - required: - patterns + - required: + - semgrep-internal-patterns-allow-disjoint - required: - pattern-regex - required: @@ -546,6 +571,8 @@ $defs: - pattern - required: - patterns + - required: + - semgrep-internal-patterns-allow-disjoint - required: - pattern-either additionalProperties: false @@ -640,6 +667,18 @@ $defs: required: - patterns additionalProperties: false + # EXPERIMENTAL + semgrep-internal-patterns-allow-disjoint: + type: object + properties: + semgrep-internal-patterns-allow-disjoint: + title: >- + Return finding where all of the nested conditions are true, without + respect to position of subpatterns. + $ref: "#/$defs/patterns-content" + required: + - semgrep-internal-patterns-allow-disjoint + additionalProperties: false pattern-either: type: object properties: @@ -871,6 +910,8 @@ properties: type: string patterns: $ref: "#/$defs/patterns-content" + semgrep-internal-patterns-allow-disjoint: + $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" # EXPERIMENTAL @@ -1041,6 +1082,8 @@ properties: - pattern - required: - patterns + - required: + - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: @@ -1051,6 +1094,8 @@ properties: - pattern - required: - patterns + - required: + - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: From d0367621b7faf7c22485ca63b5150e69639a678e Mon Sep 17 00:00:00 2001 From: Cooper Pierce Date: Wed, 8 Nov 2023 09:48:23 -0800 Subject: [PATCH 2/3] Rename based on discussion --- rule_schema_v1.yaml | 68 +++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/rule_schema_v1.yaml b/rule_schema_v1.yaml index 82575ac0..e429c2c0 100644 --- a/rule_schema_v1.yaml +++ b/rule_schema_v1.yaml @@ -132,6 +132,7 @@ $defs: - required: [ any ] - required: [ not ] - required: [ inside ] + - required: [ anywhere ] - required: [ taint ] properties: pattern: @@ -152,6 +153,8 @@ $defs: $ref: "#/$defs/new-pattern" inside: $ref: "#/$defs/new-pattern" + anywhere: + $ref: "#/$defs/new-pattern" where: type: array items: @@ -217,10 +220,10 @@ $defs: - required: [ pattern ] - required: [ pattern-regex ] - required: [ patterns ] - - required: [ semgrep-internal-patterns-allow-disjoint ] - required: [ pattern-either ] - required: [ pattern-not ] - required: [ pattern-inside ] + - required: [ semgrep-internal-pattern-anywhere ] - required: [ pattern-not-inside ] properties: pattern: @@ -229,14 +232,14 @@ $defs: type: string patterns: $ref: "#/$defs/patterns-content" - semgrep-internal-patterns-allow-disjoint: - $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" pattern-not: $ref: "#/$defs/general-pattern-content" pattern-inside: $ref: "#/$defs/general-pattern-content" + semgrep-internal-pattern-anywhere: + $ref: "#/$defs/general-pattern-content" pattern-not-inside: $ref: "#/$defs/general-pattern-content" else: @@ -247,10 +250,10 @@ $defs: items: anyOf: - $ref: "#/$defs/patterns" - - $ref: "#/$defs/semgrep-internal-patterns-allow-disjoint" - $ref: "#/$defs/pattern-either" - $ref: "#/$defs/focus-metavariable" - $ref: "#/$defs/pattern-inside" + - $ref: "#/$defs/semgrep-internal-pattern-anywhere" - $ref: "#/$defs/pattern-not-inside" - $ref: "#/$defs/pattern-not" - $ref: "#/$defs/pattern" @@ -268,9 +271,9 @@ $defs: items: anyOf: - $ref: "#/$defs/patterns" - - $ref: "#/$defs/semgrep-internal-patterns-allow-disjoint" - $ref: "#/$defs/pattern-either" - $ref: "#/$defs/pattern-inside" + - $ref: "#/$defs/semgrep-internal-pattern-anywhere" - $ref: "#/$defs/pattern" - $ref: "#/$defs/pattern-regex" taint-content: @@ -508,8 +511,6 @@ $defs: type: string patterns: $ref: "#/$defs/patterns-content" - semgrep-internal-patterns-allow-disjoint: - $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" required: @@ -521,8 +522,6 @@ $defs: anyOf: - required: - patterns - - required: - - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: @@ -533,20 +532,6 @@ $defs: anyOf: - required: - pattern - - required: - - semgrep-internal-patterns-allow-disjoint - - required: - - pattern-either - - required: - - pattern-regex - - required: - - semgrep-internal-patterns-allow-disjoint - not: - anyOf: - - required: - - pattern - - required: - - patterns - required: - pattern-either - required: @@ -559,8 +544,6 @@ $defs: - pattern - required: - patterns - - required: - - semgrep-internal-patterns-allow-disjoint - required: - pattern-regex - required: @@ -571,8 +554,6 @@ $defs: - pattern - required: - patterns - - required: - - semgrep-internal-patterns-allow-disjoint - required: - pattern-either additionalProperties: false @@ -667,18 +648,6 @@ $defs: required: - patterns additionalProperties: false - # EXPERIMENTAL - semgrep-internal-patterns-allow-disjoint: - type: object - properties: - semgrep-internal-patterns-allow-disjoint: - title: >- - Return finding where all of the nested conditions are true, without - respect to position of subpatterns. - $ref: "#/$defs/patterns-content" - required: - - semgrep-internal-patterns-allow-disjoint - additionalProperties: false pattern-either: type: object properties: @@ -709,6 +678,21 @@ $defs: required: - pattern-inside additionalProperties: false + # EXPERIMENTAL + semgrep-internal-pattern-anywhere: + type: object + properties: + semgrep-internal-pattern-anywhere: + title: >- + Marks this subpattern such that at a containing `patterns` or other + form of conjunction the range is not considered; subpattern matches + are instead combined solely on the basis of metavariables, without + respect to range, and the range of the matching subpattern is + discarded wholly. + $ref: "#/$defs/general-pattern-content" + required: + - semgrep-internal-pattern-anywhere + additionalProperties: false pattern-not-inside: type: object properties: @@ -910,8 +894,6 @@ properties: type: string patterns: $ref: "#/$defs/patterns-content" - semgrep-internal-patterns-allow-disjoint: - $ref: "#/$defs/patterns-content" pattern-either: $ref: "#/$defs/pattern-either-content" # EXPERIMENTAL @@ -1082,8 +1064,6 @@ properties: - pattern - required: - patterns - - required: - - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: @@ -1094,8 +1074,6 @@ properties: - pattern - required: - patterns - - required: - - semgrep-internal-patterns-allow-disjoint - required: - pattern-either - required: From a9de831f5824652d4a95b185ac5e561c108d04ea Mon Sep 17 00:00:00 2001 From: Cooper Pierce Date: Wed, 8 Nov 2023 14:21:47 -0800 Subject: [PATCH 3/3] Update matching explanations --- semgrep_output_v1.atd | 1 + semgrep_output_v1.jsonschema | 1 + semgrep_output_v1.proto | 2 +- semgrep_output_v1.py | 21 ++++++++++++++++++++- semgrep_output_v1.ts | 5 +++++ semgrep_output_v1_j.ml | 8 ++++++++ semgrep_output_v1_j.mli | 1 + 7 files changed, 37 insertions(+), 2 deletions(-) diff --git a/semgrep_output_v1.atd b/semgrep_output_v1.atd index 4cecf313..ba017b7a 100644 --- a/semgrep_output_v1.atd +++ b/semgrep_output_v1.atd @@ -685,6 +685,7 @@ type matching_operation = [ | And | Or | Inside + | Anywhere (* XPat for eXtended pattern. Can be a spacegrep pattern, a * regexp pattern, or a proper semgrep pattern. * see semgrep-core/src/core/XPattern.ml diff --git a/semgrep_output_v1.jsonschema b/semgrep_output_v1.jsonschema index 24c24860..57903803 100644 --- a/semgrep_output_v1.jsonschema +++ b/semgrep_output_v1.jsonschema @@ -479,6 +479,7 @@ { "const": "And" }, { "const": "Or" }, { "const": "Inside" }, + { "const": "Anywhere" }, { "type": "array", "minItems": 2, diff --git a/semgrep_output_v1.proto b/semgrep_output_v1.proto index 84c0527e..3d134df8 100644 --- a/semgrep_output_v1.proto +++ b/semgrep_output_v1.proto @@ -1,6 +1,6 @@ // Generated by jsonschema2protobuf. DO NOT EDIT! // Source file: semgrep_output_v1.jsonschema -// Source file sha256 digest: 1351efaf61282cfef70c8d2a62674f6ddd029d3939abe4ab9981a24935ec81e7 +// Source file sha256 digest: caeb7ef7bba1a9fa85c84d7a3673278a490e1023c6aa636466fd1888cf4a9f44 syntax = "proto3"; diff --git a/semgrep_output_v1.py b/semgrep_output_v1.py index 017936f1..1d38940a 100644 --- a/semgrep_output_v1.py +++ b/semgrep_output_v1.py @@ -521,6 +521,23 @@ def to_json_string(self, **kw: Any) -> str: return json.dumps(self.to_json(), **kw) +@dataclass +class Anywhere: + """Original type: matching_operation = [ ... | Anywhere | ... ]""" + + @property + def kind(self) -> str: + """Name of the class representing this variant.""" + return 'Anywhere' + + @staticmethod + def to_json() -> Any: + return 'Anywhere' + + def to_json_string(self, **kw: Any) -> str: + return json.dumps(self.to_json(), **kw) + + @dataclass class XPat: """Original type: matching_operation = [ ... | XPat of ... | ... ]""" @@ -680,7 +697,7 @@ def to_json_string(self, **kw: Any) -> str: class MatchingOperation: """Original type: matching_operation = [ ... ]""" - value: Union[And, Or, Inside, XPat, Negation, Filter, Taint, TaintSource, TaintSink, TaintSanitizer, EllipsisAndStmts, ClassHeaderAndElems] + value: Union[And, Or, Inside, Anywhere, XPat, Negation, Filter, Taint, TaintSource, TaintSink, TaintSanitizer, EllipsisAndStmts, ClassHeaderAndElems] @property def kind(self) -> str: @@ -696,6 +713,8 @@ def from_json(cls, x: Any) -> 'MatchingOperation': return cls(Or()) if x == 'Inside': return cls(Inside()) + if x == 'Anywhere': + return cls(Anywhere()) if x == 'Negation': return cls(Negation()) if x == 'Taint': diff --git a/semgrep_output_v1.ts b/semgrep_output_v1.ts index 5290dd9d..cb99ab36 100644 --- a/semgrep_output_v1.ts +++ b/semgrep_output_v1.ts @@ -283,6 +283,7 @@ export type MatchingOperation = | { kind: 'And' } | { kind: 'Or' } | { kind: 'Inside' } +| { kind: 'Anywhere' } | { kind: 'XPat'; value: string } | { kind: 'Negation' } | { kind: 'Filter'; value: string } @@ -1493,6 +1494,8 @@ export function writeMatchingOperation(x: MatchingOperation, context: any = x): return 'Or' case 'Inside': return 'Inside' + case 'Anywhere': + return 'Anywhere' case 'XPat': return ['XPat', _atd_write_string(x.value, x)] case 'Negation': @@ -1523,6 +1526,8 @@ export function readMatchingOperation(x: any, context: any = x): MatchingOperati return { kind: 'Or' } case 'Inside': return { kind: 'Inside' } + case 'Anywhere': + return { kind: 'Anywhere' } case 'Negation': return { kind: 'Negation' } case 'Taint': diff --git a/semgrep_output_v1_j.ml b/semgrep_output_v1_j.ml index fe0fef2c..477711b8 100644 --- a/semgrep_output_v1_j.ml +++ b/semgrep_output_v1_j.ml @@ -12,6 +12,7 @@ type matching_operation = Semgrep_output_v1_t.matching_operation = And | Or | Inside + | Anywhere | XPat of string | Negation | Filter of string @@ -868,6 +869,7 @@ let write_matching_operation : _ -> matching_operation -> _ = ( | And -> Buffer.add_string ob "\"And\"" | Or -> Buffer.add_string ob "\"Or\"" | Inside -> Buffer.add_string ob "\"Inside\"" + | Anywhere -> Buffer.add_string ob "\"Anywhere\"" | XPat x -> Buffer.add_string ob "[\"XPat\","; ( @@ -910,6 +912,10 @@ let read_matching_operation = ( Yojson.Safe.read_space p lb; Yojson.Safe.read_gt p lb; (Inside : matching_operation) + | "Anywhere" -> + Yojson.Safe.read_space p lb; + Yojson.Safe.read_gt p lb; + (Anywhere : matching_operation) | "XPat" -> Atdgen_runtime.Oj_run.read_until_field_value p lb; let x = ( @@ -967,6 +973,8 @@ let read_matching_operation = ( (Or : matching_operation) | "Inside" -> (Inside : matching_operation) + | "Anywhere" -> + (Anywhere : matching_operation) | "Negation" -> (Negation : matching_operation) | "Taint" -> diff --git a/semgrep_output_v1_j.mli b/semgrep_output_v1_j.mli index 35e76ffd..4c8a1fff 100644 --- a/semgrep_output_v1_j.mli +++ b/semgrep_output_v1_j.mli @@ -12,6 +12,7 @@ type matching_operation = Semgrep_output_v1_t.matching_operation = And | Or | Inside + | Anywhere | XPat of string | Negation | Filter of string