Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make project_metadata mandatory in scan_request #316

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- [ ] I ran `make setup && make` to update the generated code after editing a `.atd` file (TODO: have a CI check)
- [ ] I made sure we're still backward compatible with old versions of the CLI.
For example, the Semgrep backend need to still be able to *consume* data generated
by Semgrep 1.17.0.
by Semgrep 1.50.0.
See https://atd.readthedocs.io/en/latest/atdgen-tutorial.html#smooth-protocol-upgrades
144 changes: 81 additions & 63 deletions semgrep_output_v1.atd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* This file specifies the JSON formats of:
* - the output of the 'semgrep scan --json' command
* - the output of the 'semgrep test --json' command
* - the messages sent (and received) to the Semgrep backend by the
* - the messages exchanged with the Semgrep backend by the
* 'semgrep ci' command
*
* There are other important form of outputs which are not specified here:
Expand Down Expand Up @@ -77,7 +77,10 @@ type raw_json <ocaml module="Yojson.Basic" t="t"> = abstract
* less: could convert directly to Path class of pathlib library for Python
* See libs/commons/ATD_string_wrap.ml for more info on those ATD_string_wrap.
*)
type fpath <ocaml attr="deriving show, eq"> <python decorator="dataclass(frozen=True)">= string wrap <ocaml module="ATD_string_wrap.Fpath">
type fpath
<ocaml attr="deriving show, eq">
<python decorator="dataclass(frozen=True)"> =
string wrap <ocaml module="ATD_string_wrap.Fpath">

type uri = string wrap <ocaml module="ATD_string_wrap.Uri">

Expand All @@ -104,8 +107,10 @@ type position
col: int; (* starts from 1 *)
(* Byte position from the beginning of the file, starts at 0.
* OCaml code sets it correctly. Python code sets it to a dummy value (-1).
* This uses '~' because semgrep < 1.30? was *producing* positions without
* This uses '~' because pysemgrep < 1.30? was *producing* positions without
* offset sometimes, and we want the backend to still *consume* such positions.
* Note that pysemgrep 1.97 was still producing dummy positions without
* an offset so we might need this ~offset longer than expected?
*)
~offset: int;
}
Expand Down Expand Up @@ -143,8 +148,8 @@ type rule_id
coupling: with 'severity' in 'rule_schema_v2.atd'
*)
type match_severity
<ocaml attr="deriving show, eq">
<python decorator="dataclass(frozen=True)"> = [
<ocaml attr="deriving show, eq">
<python decorator="dataclass(frozen=True)"> = [
| Error <json name="ERROR">
| Warning <json name="WARNING">
| Experiment <json name="EXPERIMENT">
Expand Down Expand Up @@ -173,8 +178,8 @@ type match_severity
alt: could reuse match_severity but seems cleaner to define its own type
*)
type error_severity
<ocaml attr="deriving show, eq">
<python decorator="dataclass(frozen=True)"> = [
<ocaml attr="deriving show, eq">
<python decorator="dataclass(frozen=True)"> = [
| Error <json name="error">
| Warning <json name="warn">
| Info <json name="info">
Expand All @@ -187,11 +192,11 @@ type error_severity
Interfile_taint = requires interfile taint
Other_pro_feature = requires some non-taint pro feature *)
type pro_feature
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = {
interproc_taint: bool;
interfile_taint: bool;
proprietary_language: bool;
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = {
interproc_taint: bool;
interfile_taint: bool;
proprietary_language: bool;
}

(* Report the engine used to detect each finding. Additionally, if we are able
Expand All @@ -208,17 +213,17 @@ type pro_feature
we're leaving them as is
*)
type engine_of_finding
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = [
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = [
| OSS
| PRO
(* Semgrep 1.64.0 or later *)
| PRO_REQUIRED of pro_feature
]

type engine_kind
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = [
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True)"> = [
| OSS
| PRO
]
Expand Down Expand Up @@ -466,8 +471,8 @@ type match_intermediate_var <python decorator="dataclass(frozen=True)"> = {
* Error.string_of_error_type() for osemgrep.
*)
type error_type
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True, order=True)"> = [
<ocaml attr="deriving show">
<python decorator="dataclass(frozen=True, order=True)"> = [
(* File parsing related errors;
coupling: if you add a target parse error then metrics for
cli need to be updated. See cli/src/semgrep/parsing_data.py.
Expand Down Expand Up @@ -553,7 +558,7 @@ type core_error <python decorator="dataclass(frozen=True)"> = {
?details: string option;
?location: location option;
?rule_id: rule_id option;
}
}

(* ----------------------------- *)
(* CLI *)
Expand Down Expand Up @@ -869,8 +874,8 @@ type cli_output_extra = {
*)
?explanations: matching_explanation list option;

(* These rules, classified by engine used, will let us be transparent in the CLI
* output over what rules were run with what.
(* These rules, classified by engine used, will let us be transparent in
* the CLI output over what rules were run with what.
* EXPERIMENTAL: since: 1.11.0
*)
?rules_by_engine: rule_id_and_engine_kind list option;
Expand Down Expand Up @@ -1060,6 +1065,7 @@ type todo = int
(* This is also known as Software Composition Analysis (SCA) *)

(* EXPERIMENTAL *)
(* part of cli_match_extra *)
type sca_info = {
reachable: bool;
reachability_rule: bool;
Expand All @@ -1073,11 +1079,19 @@ type dependency_match = {
lockfile: string;
}

type dependency_pattern = {
ecosystem: ecosystem;
package: string;
semver_range: string;
}

(* both ecosystem and transitivity below have frozen=True so the generated
* classes can be hashed and put in sets (see calls to reachable_deps.add()
* in semgrep SCA code)
*)
type ecosystem <python decorator="dataclass(frozen=True)"> <ocaml attr="deriving show,eq"> = [
type ecosystem
<python decorator="dataclass(frozen=True)">
<ocaml attr="deriving show,eq"> = [
| Npm <json name="npm">
| Pypi <json name="pypi">
| Gem <json name="gem">
Expand All @@ -1093,23 +1107,6 @@ type ecosystem <python decorator="dataclass(frozen=True)"> <ocaml attr="deriving
| Hex <json name="hex">
]

type transitivity <python decorator="dataclass(frozen=True)"> <ocaml attr="deriving show,eq"> = [
| Direct <json name="direct">
| Transitive <json name="transitive">
| Unknown <json name="unknown">
]

type dependency_pattern = {
ecosystem: ecosystem;
package: string;
semver_range: string;
}

type dependency_child <python decorator="dataclass(frozen=True)"> = {
package: string;
version: string;
}

type found_dependency = {
package: string;
version: string;
Expand Down Expand Up @@ -1146,7 +1143,35 @@ type found_dependency = {
?git_ref: string option;
}

(* json names are to maintain backwards compatibility with the python enum it is replacing *)
type transitivity
<python decorator="dataclass(frozen=True)">
<ocaml attr="deriving show,eq"> = [
| Direct <json name="direct">
| Transitive <json name="transitive">
| Unknown <json name="unknown">
]


type dependency_child <python decorator="dataclass(frozen=True)"> = {
package: string;
version: string;
}

(* Used in ci_scan_complete *)
type dependency_parser_error = {
path: string;
parser: sca_parser_name;
reason: string;
(* Not using `position` because this type must be backwards compatible with the python
* class it is replacing.
*)
?line: int option;
?col: int option;
?text: string option;
}

(* json names are to maintain backwards compatibility with the python enum it is
* replacing *)
type sca_parser_name = [
| Gemfile_lock <json name="gemfile_lock">
| Go_mod <json name="go_mod">
Expand All @@ -1170,17 +1195,6 @@ type sca_parser_name = [
| Mix_lock <json name="mix_lock">
]

type dependency_parser_error = {
path: string;
parser: sca_parser_name;
reason: string;
(* Not using `position` because this type must be backwards compatible with the python
* class it is replacing.
*)
?line: int option;
?col: int option;
?text: string option;
}

(*****************************************************************************)
(* Semgrep Secrets *)
Expand All @@ -1204,8 +1218,8 @@ type historical_info = {
(*****************************************************************************)

(* EXPERIMENTAL: do not rely on the types in this section; those are internal
* types used to communicate with the Semgrep backend and are not meant
* to be consumed directly by Semgrep users or tools wrapping up Semgrep.
* types used to communicate with the Semgrep App backend and are not meant
* to be consumed directly by Semgrep users or tools wrapping Semgrep.
*
* The sequence of HTTP requests is mostly:
* - /deployments/current with token
Expand Down Expand Up @@ -1293,19 +1307,19 @@ type deployment_config <ocaml attr="deriving show"> = {
inherit has_features;
}

(* Content of the .semgrepconfig.yml in the repository.
(* Content of a possible .semgrepconfig.yml in the repository.
*
* This config allows to configure Semgrep per repo, e.g., to store
* a category/tag like "webapp" in a repo so that the Semgrep WebApp can
* return a set of relevant rules automatically for this repo in scan_config
* later when given this ci_config_from_repo in the scan_request.
*)
type ci_config_from_repo = {
(* version of the .semgrepconfig.yml format. "V1" right now (useful?) *)
(* version of the .semgrepconfig.yml format. "v1" right now (useful?) *)
~version <python default="Version('v1')"> <ts default="'v1'">: version;
?tags: tag list option;
}
(* ?? ex? *)
(* ex: "webapp" *)
type tag = string

(* Response by the backend to the CLI to the POST /scans/<scan_id>/config *)
Expand All @@ -1319,7 +1333,7 @@ type scan_config = {
policy_names: string list;
(* rules raw content in JSON format (but still sent as a string) *)
rule_config: string;
(* since 1.47.0 *)
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;

(* Deprecated: should rely on ci_config_from_cloud instead *)
Expand Down Expand Up @@ -1441,12 +1455,15 @@ type scan_metadata = {

(* Sent by the CLI to the POST /api/cli/scans to create a scan. *)
type scan_request = {
(* until 1.43ish, was 'meta: project_metadata;' before *)
meta: raw_json;
(* from 1.43 *)
?project_metadata: project_metadata option; (* replacing meta *)
(* added in 1.43 as options, and mandatory since 1.98.0 (replacing meta) *)
project_metadata: project_metadata;
scan_metadata: scan_metadata;
(* added in 1.43 (used to be in meta) *)
?project_config: ci_config_from_repo option;
?scan_metadata: scan_metadata option;
(* deprecated: moved as an option in 1.98.0 and was used until 1.43ish
* old: 'meta: project_metadata;' before 1.43
*)
?meta: raw_json option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
Expand Down Expand Up @@ -1647,7 +1664,8 @@ type ci_scan_results_response_error <ocaml attr="deriving show"> = {
type ci_scan_complete = {
exit_code: int;
stats: ci_scan_complete_stats;
?dependencies: ci_scan_dependencies option; (* remove when min version is 1.38.0 *)
(* TODO: remove dependencies when min version is 1.38.0 *)
?dependencies: ci_scan_dependencies option;
?dependency_parser_errors: dependency_parser_error list option;
(* since 1.31.0 *)
?task_id: string option;
Expand Down
Loading
Loading