diff --git a/.env.example b/.env.example index e576e65..ad67394 100644 --- a/.env.example +++ b/.env.example @@ -4,18 +4,43 @@ DATABASE_URL=postgresql://postgres:postgres@localhost:5432/zenvra # ─── Cache / Queue ──────────────────────────────────────────────────────────── REDIS_URL=redis://localhost:6379 -# ─── AI (Anthropic) ─────────────────────────────────────────────────────────── -# Get yours at: https://console.anthropic.com -ANTHROPIC_API_KEY=sk-ant-... +# ─── AI Provider ────────────────────────────────────────────────────────────── +# Zenvra supports multiple AI providers for explanations and fix generation. +# Supported providers: anthropic, openai, google, custom +# +# For "custom" provider, AI_ENDPOINT is required. +# For built-in providers, AI_ENDPOINT is optional (overrides default). +# +# OpenAI-compatible APIs (Groq, Together, Fireworks, Ollama, vLLM, LiteLLM) +# can be used via the "custom" provider with the appropriate endpoint. + +AI_PROVIDER=anthropic +AI_API_KEY=sk-ant-... +AI_MODEL=claude-sonnet-4-20250514 +AI_ENDPOINT= + +# Examples for other providers: +# AI_PROVIDER=openai +# AI_API_KEY=sk-... +# AI_MODEL=gpt-4o +# +# AI_PROVIDER=google +# AI_API_KEY=AIza... +# AI_MODEL=gemini-2.0-flash +# +# AI_PROVIDER=custom +# AI_API_KEY= # leave empty for local models (e.g. Ollama) +# AI_MODEL=llama3 +# AI_ENDPOINT=http://localhost:11434 # ─── CVE Data Feeds ─────────────────────────────────────────────────────────── # NVD API key (free): https://nvd.nist.gov/developers/request-an-api-key NVD_API_KEY= -# ─── Auth (NextAuth.js) ─────────────────────────────────────────────────────── +# ─── Auth (SvelteKit) ──────────────────────────────────────────────────────── # Generate with: openssl rand -base64 32 -NEXTAUTH_SECRET=change-me-in-production -NEXTAUTH_URL=http://localhost:3000 +AUTH_SECRET=change-me-in-production +AUTH_URL=http://localhost:5173 # GitHub OAuth (create at: github.com/settings/applications/new) GITHUB_CLIENT_ID= @@ -33,5 +58,4 @@ STRIPE_PRO_PRICE_ID=price_... STRIPE_TEAM_PRICE_ID=price_... # ─── App ────────────────────────────────────────────────────────────────────── -NEXT_PUBLIC_APP_URL=http://localhost:3000 ZENVRA_API_URL=http://localhost:8080 diff --git a/AGENTS.md b/AGENTS.md index 04e32c2..a89559e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,9 +22,9 @@ Zenvra (`zenvra.dev`) is an AI-powered code vulnerability scanner. It scans code ``` zenvra/ ├── apps/ -│ └── web/ # Next.js 15 frontend — scanner UI, dashboard, auth, billing +│ └── web/ # SvelteKit 5 frontend — scanner UI, dashboard, auth, billing ├── crates/ -│ ├── scanner/ # Rust core: SAST engine, SCA, secrets detection, CVE lookup +│ ├── scanner/ # Rust core: SAST engine, SCA, secrets detection, CVE lookup, AI provider layer │ └── cli/ # Rust CLI: `zenvra scan`, `zenvra report`, `zenvra auth` ├── extensions/ │ └── vscode/ # VS Code extension: inline diagnostics, hover fixes @@ -38,22 +38,59 @@ zenvra/ | Layer | Technology | Notes | |-------|-----------|-------| -| Frontend | Next.js 15, TypeScript, Tailwind CSS | App Router. No Pages Router. | -| UI components | shadcn/ui | Installed in apps/web/components/ui | +| Frontend | SvelteKit 5, TypeScript, Tailwind CSS v4 | File-based routing. Svelte 5 runes syntax. | | Backend API | Rust, Axum | REST + SSE for streaming scan results | | Scan engine | Rust, Semgrep (via subprocess) | Custom rules in crates/scanner/rules/ | -| Secrets detection | Rust, Gitleaks patterns | Compiled regex patterns | -| AI explanations | Anthropic Claude API (claude-sonnet-4-20250514) | For CVE explanations and fix generation only | +| Secrets detection | Rust, compiled regex patterns | Gitleaks-inspired patterns | +| AI explanations | Multi-provider (Anthropic, OpenAI, Google, custom) | Bring-your-own-key supported. See AI Provider section. | | CVE database | NVD + OSV + GitHub Advisory DB | Synced daily via cron in scripts/sync-cve.sh | -| Database | PostgreSQL 16 | Diesel ORM in Rust, Prisma in Next.js | +| Database | PostgreSQL 16, sqlx | Compile-time checked async queries | | Cache / Queue | Redis 7 | Scan jobs via a simple queue pattern | -| Auth | NextAuth.js v5 | GitHub + Google OAuth + email magic link | +| Auth | TBD (SvelteKit-based) | GitHub + Google OAuth | | Payments | Stripe | Subscription billing | | CLI | Rust, Clap v4 | Produces single static binary | | VS Code ext | TypeScript, VS Code Extension API | LSP-style diagnostics | --- +## AI Provider System + +Zenvra supports multiple AI providers for generating vulnerability explanations and fix suggestions. Users can bring their own API key and even configure custom endpoints. + +### Supported Providers + +| Provider | Models | Notes | +|----------|--------|-------| +| Anthropic | claude-sonnet-4-20250514, etc. | Default provider | +| OpenAI | gpt-4o, gpt-4o-mini, etc. | Also works for OpenAI-compatible APIs (Groq, Together, etc.) | +| Google | gemini-2.0-flash, etc. | Gemini generateContent API | +| Custom | User-defined | Any endpoint with OpenAI-compatible API format | + +### Configuration + +```env +AI_PROVIDER=anthropic # anthropic | openai | google | custom +AI_API_KEY=sk-ant-... # API key for the chosen provider +AI_MODEL=claude-sonnet-4-20250514 # Model identifier +AI_ENDPOINT= # Only needed for custom provider or non-default endpoints +``` + +### Architecture + +The `AiProvider` trait in `crates/scanner/src/ai/` defines the interface: + +```rust +#[async_trait] +pub trait AiProvider: Send + Sync { + async fn explain(&self, finding: &RawFinding) -> Result; + async fn generate_fix(&self, finding: &RawFinding) -> Result; +} +``` + +Each provider (`AnthropicProvider`, `OpenAiProvider`, `GoogleProvider`, `CustomProvider`) implements this trait. Provider selection is config-driven via `AiConfig`. + +--- + ## Coding Rules — Always Follow These ### Rust @@ -65,11 +102,10 @@ zenvra/ - Tests in `#[cfg(test)]` modules at bottom of each file - No `unsafe` without a comment explaining exactly why it's safe -### TypeScript / Next.js +### TypeScript / SvelteKit - TypeScript strict mode is ON — no `any`, no `@ts-ignore` -- Named exports everywhere except Next.js page components -- Server Components by default; add `"use client"` only when needed -- API routes live in `apps/web/src/app/api/` +- Named exports everywhere except SvelteKit page/layout components +- Use Svelte 5 runes syntax (`$state`, `$derived`, `$effect`, `$props`) - No secrets or API keys ever in client-side code - All fetch calls go through typed API client functions in `apps/web/src/lib/api.ts` - Components max 200 lines — split into smaller ones if larger @@ -93,11 +129,11 @@ API validates input + queues scan job (Redis) Rust scanner worker picks up job: ├── SAST: run Semgrep with Zenvra ruleset ├── SCA: parse dependency files → query OSV/NVD API - └── Secrets: scan with Gitleaks regex patterns + └── Secrets: scan with compiled regex patterns ↓ Raw findings → CVE lookup (local DB + NVD fallback) ↓ -Claude API: generate plain-English explanation + corrected code +AI Provider: generate plain-English explanation + corrected code ↓ Results stored in PostgreSQL, streamed to client via SSE ↓ @@ -109,18 +145,16 @@ User sees: severity badge + CVE ID + explanation + fix + shareable card ## Key Domain Types (Rust) ```rust -pub struct ScanJob { - pub id: Uuid, +pub struct ScanConfig { pub code: String, pub language: Language, - pub engines: Vec, // Sast, Sca, Secrets - pub created_at: DateTime, + pub engines: Vec, + pub ai_config: Option, } pub struct Finding { pub id: Uuid, - pub scan_id: Uuid, - pub engine: ScanEngine, + pub engine: Engine, pub cve_id: Option, // e.g. "CVE-2025-12345" pub cwe_id: Option, // e.g. "CWE-89" pub severity: Severity, // Critical, High, Medium, Low, Info @@ -134,7 +168,7 @@ pub struct Finding { } pub enum Severity { Critical, High, Medium, Low, Info } -pub enum ScanEngine { Sast, Sca, Secrets, AiCode } +pub enum Engine { Sast, Sca, Secrets, AiCode } pub enum Language { Python, JavaScript, TypeScript, Rust, Go, Java, /* ... */ } ``` @@ -149,15 +183,18 @@ Required in `.env` (see `.env.example`): DATABASE_URL=postgresql://localhost:5432/zenvra REDIS_URL=redis://localhost:6379 -# AI -ANTHROPIC_API_KEY=sk-ant-... +# AI Provider (multi-provider — see AI Provider System section) +AI_PROVIDER=anthropic +AI_API_KEY=sk-ant-... +AI_MODEL=claude-sonnet-4-20250514 +AI_ENDPOINT= # CVE feeds NVD_API_KEY=... -# Auth (Next.js) -NEXTAUTH_SECRET=... -NEXTAUTH_URL=http://localhost:3000 +# Auth (SvelteKit) +AUTH_SECRET=change-me-in-production +AUTH_URL=http://localhost:5173 GITHUB_CLIENT_ID=... GITHUB_CLIENT_SECRET=... @@ -171,8 +208,8 @@ STRIPE_WEBHOOK_SECRET=whsec_... ## What NOT to Do - Do NOT use `unwrap()` or `expect()` in library/API code -- Do NOT put business logic in React components — it goes in server actions or API routes -- Do NOT call the Claude API for anything other than explanation + fix generation (it's expensive) +- Do NOT put business logic in Svelte components — it goes in server-side load functions or API routes +- Do NOT call the AI API for anything other than explanation + fix generation (it's expensive) - Do NOT store raw code in the database longer than needed — scan results only - Do NOT add dependencies without discussion — keep the dependency tree lean - Do NOT break the existing API contract without a migration plan @@ -182,7 +219,7 @@ STRIPE_WEBHOOK_SECRET=whsec_... ## Current Status -This repository is in **initial setup phase**. The structure, CI, and issue templates are being established. No production code exists yet. First milestone: working web paste scanner (MVP). +This repository is in **active MVP development**. The scan engine foundation, multi-AI provider system, and secrets detection are being built. First milestone: working CLI scanner + web paste UI. When in doubt about a decision, open a GitHub Discussion rather than assuming. We build deliberately. diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 26e1231..cc5c8e4 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -20,3 +20,4 @@ tracing.workspace = true tracing-subscriber.workspace = true colored = "2" indicatif = "0.17" +walkdir = "2" diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 6bda798..b6341b5 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,6 +1,6 @@ //! Zenvra CLI — `zenvra scan`, `zenvra report`, and more. -use anyhow::Result; +use anyhow::{Context, Result}; use clap::{Parser, Subcommand}; use std::path::PathBuf; @@ -23,17 +23,33 @@ enum Commands { /// Path to the file or directory to scan path: PathBuf, - /// Output format: text (default), json, sarif + /// Output format: text (default), json #[arg(short, long, default_value = "text")] output: String, /// Minimum severity to report: info, low, medium, high, critical - #[arg(short, long, default_value = "medium")] + #[arg(short, long, default_value = "low")] severity: String, /// Disable specific engines (comma-separated: sast,sca,secrets,ai_code) #[arg(long)] disable: Option, + + /// AI provider: anthropic, openai, google, custom + #[arg(long)] + ai_provider: Option, + + /// AI API key (or set AI_API_KEY env var) + #[arg(long)] + ai_key: Option, + + /// AI model name (e.g. claude-sonnet-4-20250514, gpt-4o, gemini-2.0-flash) + #[arg(long)] + ai_model: Option, + + /// AI endpoint URL (required for custom provider, optional for others) + #[arg(long)] + ai_endpoint: Option, }, /// Authenticate with zenvra.dev (required for private repos and unlimited scans) @@ -62,22 +78,47 @@ async fn main() -> Result<()> { let cli = Cli::parse(); match cli.command { - Commands::Scan { path, output, severity, disable } => { - cmd_scan(path, output, severity, disable).await + Commands::Scan { + path, + output, + severity, + disable, + ai_provider, + ai_key, + ai_model, + ai_endpoint, + } => { + cmd_scan( + path, + output, + severity, + disable, + ai_provider, + ai_key, + ai_model, + ai_endpoint, + ) + .await } Commands::Auth { token } => cmd_auth(token).await, Commands::Report { id } => cmd_report(id).await, } } +#[allow(clippy::too_many_arguments)] async fn cmd_scan( path: PathBuf, - _output: String, - _severity: String, - _disable: Option, + output: String, + min_severity: String, + disable: Option, + ai_provider: Option, + ai_key: Option, + ai_model: Option, + ai_endpoint: Option, ) -> Result<()> { use colored::Colorize; use indicatif::{ProgressBar, ProgressStyle}; + use zenvra_scanner::{Engine, Finding, Language, ScanConfig, Severity}; println!("{}", "⚡ Zenvra — scanning for vulnerabilities".bold()); println!(" Path: {}", path.display()); @@ -87,29 +128,318 @@ async fn cmd_scan( pb.set_style( ProgressStyle::default_spinner() .template("{spinner:.cyan} {msg}") - .unwrap(), + .expect("valid template"), ); - pb.set_message("Scanning..."); + pb.set_message("Reading files..."); pb.enable_steady_tick(std::time::Duration::from_millis(80)); - // TODO: read files from path, detect language, run scanner - // For now: placeholder - tokio::time::sleep(std::time::Duration::from_millis(500)).await; + // Determine which engines to run. + let disabled: Vec = disable + .unwrap_or_default() + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(); + + let mut engines = Vec::new(); + if !disabled.contains(&"sast".to_string()) { + engines.push(Engine::Sast); + } + if !disabled.contains(&"sca".to_string()) { + engines.push(Engine::Sca); + } + if !disabled.contains(&"secrets".to_string()) { + engines.push(Engine::Secrets); + } + if !disabled.contains(&"ai_code".to_string()) { + engines.push(Engine::AiCode); + } + + // Parse minimum severity. + let min_sev = match min_severity.to_lowercase().as_str() { + "info" => Severity::Info, + "low" => Severity::Low, + "medium" => Severity::Medium, + "high" => Severity::High, + "critical" => Severity::Critical, + _ => Severity::Low, + }; + + // Build AI config if provider is specified. + let ai_config = build_ai_config(ai_provider, ai_key, ai_model, ai_endpoint)?; + + // Collect files to scan. + let files = collect_files(&path)?; + pb.set_message(format!("Scanning {} file(s)...", files.len())); + + // Run scan on each file. + let mut all_findings: Vec = Vec::new(); + + for (file_path, content) in &files { + let ext = file_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + let language = Language::from_extension(ext); + + let config = ScanConfig { + code: content.clone(), + language, + engines: engines.clone(), + ai_config: ai_config.clone(), + file_path: Some(file_path.display().to_string()), + }; + + match zenvra_scanner::scan(&config).await { + Ok(mut findings) => all_findings.append(&mut findings), + Err(e) => { + tracing::warn!("Error scanning {}: {}", file_path.display(), e); + } + } + } + + // Filter by minimum severity. + all_findings.retain(|f| f.severity >= min_sev); + + // Sort by severity descending. + all_findings.sort_by(|a, b| b.severity.cmp(&a.severity)); pb.finish_and_clear(); - println!("{}", "✓ Scan complete".green().bold()); - println!(); - println!(" {} findings", "0".yellow()); - println!(); - println!( - " Run {} for the full scanner implementation.", - "zenvra auth".cyan() - ); + // Output results. + match output.as_str() { + "json" => { + let json = serde_json::to_string_pretty(&all_findings) + .context("Failed to serialize findings")?; + println!("{json}"); + } + _ => { + print_findings(&all_findings, files.len()); + } + } Ok(()) } +/// Build AI config from CLI flags and environment variables. +fn build_ai_config( + provider: Option, + key: Option, + model: Option, + endpoint: Option, +) -> Result> { + use zenvra_scanner::ai::{AiConfig, ProviderKind}; + + // Try CLI flags first, then env vars. + let provider_str = provider.or_else(|| std::env::var("AI_PROVIDER").ok()); + let api_key = key.or_else(|| std::env::var("AI_API_KEY").ok()); + + let Some(provider_str) = provider_str else { + return Ok(None); + }; + let Some(api_key) = api_key else { + return Ok(None); + }; + + let provider_kind = match provider_str.to_lowercase().as_str() { + "anthropic" => ProviderKind::Anthropic, + "openai" => ProviderKind::OpenAi, + "google" => ProviderKind::Google, + "custom" => ProviderKind::Custom, + other => anyhow::bail!("Unknown AI provider: {other}. Use: anthropic, openai, google, custom"), + }; + + let model_name = model + .or_else(|| std::env::var("AI_MODEL").ok()) + .unwrap_or_else(|| match provider_kind { + ProviderKind::Anthropic => "claude-sonnet-4-20250514".to_string(), + ProviderKind::OpenAi => "gpt-4o".to_string(), + ProviderKind::Google => "gemini-2.0-flash".to_string(), + ProviderKind::Custom => "default".to_string(), + }); + + let endpoint_url = endpoint.or_else(|| std::env::var("AI_ENDPOINT").ok()); + + Ok(Some(AiConfig { + provider: provider_kind, + api_key, + model: model_name, + endpoint: endpoint_url, + })) +} + +/// Collect all files from a path (file or directory), respecting common ignores. +fn collect_files(path: &PathBuf) -> Result> { + let mut files = Vec::new(); + + if path.is_file() { + let content = + std::fs::read_to_string(path).context(format!("Failed to read {}", path.display()))?; + files.push((path.clone(), content)); + } else if path.is_dir() { + for entry in walkdir::WalkDir::new(path) + .into_iter() + .filter_entry(|e| { + let name = e.file_name().to_string_lossy(); + // Skip common non-source directories. + !matches!( + name.as_ref(), + ".git" | "node_modules" | "target" | ".venv" | "__pycache__" | "dist" | "build" + ) + }) + { + let entry = entry?; + if entry.file_type().is_file() { + // Only scan text-like files by extension. + let ext = entry + .path() + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + if is_scannable_extension(ext) { + match std::fs::read_to_string(entry.path()) { + Ok(content) => files.push((entry.path().to_path_buf(), content)), + Err(_) => { + // Skip binary files silently. + } + } + } + } + } + } else { + anyhow::bail!("Path does not exist: {}", path.display()); + } + + Ok(files) +} + +/// Check if a file extension is one we should scan. +fn is_scannable_extension(ext: &str) -> bool { + matches!( + ext.to_lowercase().as_str(), + "py" | "js" + | "mjs" + | "cjs" + | "ts" + | "tsx" + | "jsx" + | "rs" + | "go" + | "java" + | "cs" + | "cpp" + | "cc" + | "c" + | "h" + | "rb" + | "php" + | "swift" + | "kt" + | "kts" + | "yaml" + | "yml" + | "toml" + | "json" + | "xml" + | "env" + | "sh" + | "bash" + | "zsh" + | "cfg" + | "ini" + | "conf" + | "properties" + | "tf" + | "hcl" + | "dockerfile" + | "svelte" + | "vue" + ) +} + +/// Pretty-print findings to the terminal. +fn print_findings(findings: &[zenvra_scanner::Finding], files_scanned: usize) { + use colored::Colorize; + + if findings.is_empty() { + println!("{}", "✓ No vulnerabilities found!".green().bold()); + println!(" Scanned {} file(s)", files_scanned); + return; + } + + for finding in findings { + let severity_badge = match finding.severity { + zenvra_scanner::Severity::Critical => "CRITICAL".on_red().white().bold(), + zenvra_scanner::Severity::High => "HIGH".on_truecolor(200, 80, 0).white().bold(), + zenvra_scanner::Severity::Medium => "MEDIUM".on_yellow().black().bold(), + zenvra_scanner::Severity::Low => "LOW".on_blue().white().bold(), + zenvra_scanner::Severity::Info => "INFO".on_white().black().bold(), + }; + + println!("{} — {}", severity_badge, finding.title.bold()); + + if let Some(ref file_path) = finding.file_path { + println!( + " {} line {}", + file_path.dimmed(), + finding.line_start.to_string().dimmed() + ); + } + + if let Some(ref cve) = finding.cve_id { + println!(" CVE: {}", cve.cyan()); + } + + println!(); + println!(" {}", finding.vulnerable_code.dimmed()); + println!(); + + if !finding.explanation.is_empty() { + println!(" {}", "What happened:".underline()); + println!(" {}", finding.explanation); + println!(); + } + + if !finding.fixed_code.is_empty() { + println!(" {}", "Fix:".underline()); + println!(" {}", finding.fixed_code.green()); + println!(); + } + + println!("{}", "─".repeat(60).dimmed()); + println!(); + } + + // Summary. + let critical = findings + .iter() + .filter(|f| f.severity == zenvra_scanner::Severity::Critical) + .count(); + let high = findings + .iter() + .filter(|f| f.severity == zenvra_scanner::Severity::High) + .count(); + let medium = findings + .iter() + .filter(|f| f.severity == zenvra_scanner::Severity::Medium) + .count(); + let low = findings + .iter() + .filter(|f| f.severity == zenvra_scanner::Severity::Low) + .count(); + + println!( + "Found {} issue(s) ({} critical · {} high · {} medium · {} low) scanning {} file(s)", + findings.len().to_string().yellow().bold(), + critical.to_string().red(), + high.to_string().truecolor(200, 80, 0), + medium.to_string().yellow(), + low.to_string().blue(), + files_scanned, + ); +} + async fn cmd_auth(_token: Option) -> Result<()> { println!("Opening zenvra.dev/cli-auth in your browser..."); println!("(Browser launch not yet implemented — coming in v0.2)"); diff --git a/crates/scanner/Cargo.toml b/crates/scanner/Cargo.toml index 769509f..d23f150 100644 --- a/crates/scanner/Cargo.toml +++ b/crates/scanner/Cargo.toml @@ -17,3 +17,4 @@ tracing.workspace = true reqwest.workspace = true regex = "1" walkdir = "2" +async-trait = "0.1" diff --git a/crates/scanner/src/ai/anthropic.rs b/crates/scanner/src/ai/anthropic.rs new file mode 100644 index 0000000..22dcdfe --- /dev/null +++ b/crates/scanner/src/ai/anthropic.rs @@ -0,0 +1,106 @@ +//! Anthropic (Claude) AI provider. +//! +//! Uses the Anthropic Messages API to generate explanations and fixes. + +use super::{AiProvider, build_explain_prompt, build_fix_prompt}; +use crate::finding::RawFinding; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// Anthropic API provider using the Messages endpoint. +pub struct AnthropicProvider { + api_key: String, + model: String, + endpoint: String, + client: reqwest::Client, +} + +impl AnthropicProvider { + /// Create a new Anthropic provider. + pub fn new(api_key: String, model: String, endpoint: String) -> Self { + Self { + api_key, + model, + endpoint, + client: reqwest::Client::new(), + } + } +} + +#[derive(Serialize)] +struct MessagesRequest { + model: String, + max_tokens: u32, + messages: Vec, +} + +#[derive(Serialize)] +struct Message { + role: String, + content: String, +} + +#[derive(Deserialize)] +struct MessagesResponse { + content: Vec, +} + +#[derive(Deserialize)] +struct ContentBlock { + text: Option, +} + +impl AnthropicProvider { + /// Call the Anthropic Messages API. + async fn call(&self, prompt: &str) -> Result { + let body = MessagesRequest { + model: self.model.clone(), + max_tokens: 1024, + messages: vec![Message { + role: "user".to_string(), + content: prompt.to_string(), + }], + }; + + let response = self + .client + .post(format!("{}/v1/messages", self.endpoint)) + .header("x-api-key", &self.api_key) + .header("anthropic-version", "2023-06-01") + .header("content-type", "application/json") + .json(&body) + .send() + .await + .context("Failed to call Anthropic API")?; + + if !response.status().is_success() { + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Anthropic API returned {status}: {text}"); + } + + let resp: MessagesResponse = response + .json() + .await + .context("Failed to parse Anthropic response")?; + + resp.content + .first() + .and_then(|block| block.text.clone()) + .ok_or_else(|| anyhow::anyhow!("Anthropic returned empty response")) + } +} + +#[async_trait] +impl AiProvider for AnthropicProvider { + async fn explain(&self, finding: &RawFinding) -> Result { + let prompt = build_explain_prompt(finding); + self.call(&prompt).await + } + + async fn generate_fix(&self, finding: &RawFinding) -> Result { + let prompt = build_fix_prompt(finding); + self.call(&prompt).await + } +} diff --git a/crates/scanner/src/ai/custom.rs b/crates/scanner/src/ai/custom.rs new file mode 100644 index 0000000..bfc8c8a --- /dev/null +++ b/crates/scanner/src/ai/custom.rs @@ -0,0 +1,135 @@ +//! Custom AI provider — user-configured endpoint. +//! +//! Assumes an OpenAI-compatible API format, which is the most common +//! protocol for self-hosted models (Ollama, vLLM, LiteLLM, etc.). + +use super::{AiProvider, build_explain_prompt, build_fix_prompt}; +use crate::finding::RawFinding; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// Custom AI provider for user-configured endpoints. +/// +/// Sends requests in OpenAI Chat Completions format to whatever +/// endpoint the user specifies. Works with Ollama, vLLM, LiteLLM, +/// and any OpenAI-compatible API. +pub struct CustomProvider { + api_key: String, + model: String, + endpoint: String, + client: reqwest::Client, +} + +impl CustomProvider { + /// Create a new custom provider. + pub fn new(api_key: String, model: String, endpoint: String) -> Self { + Self { + api_key, + model, + endpoint, + client: reqwest::Client::new(), + } + } +} + +#[derive(Serialize)] +struct ChatCompletionRequest { + model: String, + messages: Vec, + max_tokens: u32, +} + +#[derive(Serialize)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Deserialize)] +struct ChatCompletionResponse { + choices: Vec, +} + +#[derive(Deserialize)] +struct Choice { + message: ResponseMessage, +} + +#[derive(Deserialize)] +struct ResponseMessage { + content: Option, +} + +impl CustomProvider { + /// Call the custom OpenAI-compatible endpoint. + async fn call(&self, prompt: &str) -> Result { + let body = ChatCompletionRequest { + model: self.model.clone(), + messages: vec![ChatMessage { + role: "user".to_string(), + content: prompt.to_string(), + }], + max_tokens: 1024, + }; + + // The endpoint should be the base URL; we append /v1/chat/completions. + // If the user already includes /v1/ in their endpoint, strip trailing slashes. + let base = self.endpoint.trim_end_matches('/'); + let url = if base.ends_with("/v1") || base.ends_with("/v1/chat/completions") { + // User already specified a complete path. + if base.ends_with("/v1/chat/completions") { + base.to_string() + } else { + format!("{base}/chat/completions") + } + } else { + format!("{base}/v1/chat/completions") + }; + + let mut req = self + .client + .post(&url) + .header("Content-Type", "application/json"); + + // Only add auth header if api_key is non-empty. + if !self.api_key.is_empty() { + req = req.header("Authorization", format!("Bearer {}", self.api_key)); + } + + let response = req + .json(&body) + .send() + .await + .context("Failed to call custom AI endpoint")?; + + if !response.status().is_success() { + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Custom AI endpoint returned {status}: {text}"); + } + + let resp: ChatCompletionResponse = response + .json() + .await + .context("Failed to parse custom AI response")?; + + resp.choices + .first() + .and_then(|c| c.message.content.clone()) + .ok_or_else(|| anyhow::anyhow!("Custom AI endpoint returned empty response")) + } +} + +#[async_trait] +impl AiProvider for CustomProvider { + async fn explain(&self, finding: &RawFinding) -> Result { + let prompt = build_explain_prompt(finding); + self.call(&prompt).await + } + + async fn generate_fix(&self, finding: &RawFinding) -> Result { + let prompt = build_fix_prompt(finding); + self.call(&prompt).await + } +} diff --git a/crates/scanner/src/ai/google.rs b/crates/scanner/src/ai/google.rs new file mode 100644 index 0000000..d255d5e --- /dev/null +++ b/crates/scanner/src/ai/google.rs @@ -0,0 +1,132 @@ +//! Google Gemini AI provider. +//! +//! Uses the Gemini `generateContent` API for vulnerability explanations and fixes. + +use super::{AiProvider, build_explain_prompt, build_fix_prompt}; +use crate::finding::RawFinding; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// Google Gemini API provider. +pub struct GoogleProvider { + api_key: String, + model: String, + endpoint: String, + client: reqwest::Client, +} + +impl GoogleProvider { + /// Create a new Google Gemini provider. + pub fn new(api_key: String, model: String, endpoint: String) -> Self { + Self { + api_key, + model, + endpoint, + client: reqwest::Client::new(), + } + } +} + +#[derive(Serialize)] +struct GenerateContentRequest { + contents: Vec, + #[serde(rename = "generationConfig")] + generation_config: GenerationConfig, +} + +#[derive(Serialize)] +struct Content { + parts: Vec, +} + +#[derive(Serialize)] +struct Part { + text: String, +} + +#[derive(Serialize)] +struct GenerationConfig { + #[serde(rename = "maxOutputTokens")] + max_output_tokens: u32, +} + +#[derive(Deserialize)] +struct GenerateContentResponse { + candidates: Option>, +} + +#[derive(Deserialize)] +struct Candidate { + content: CandidateContent, +} + +#[derive(Deserialize)] +struct CandidateContent { + parts: Vec, +} + +#[derive(Deserialize)] +struct CandidatePart { + text: Option, +} + +impl GoogleProvider { + /// Call the Gemini generateContent API. + async fn call(&self, prompt: &str) -> Result { + let body = GenerateContentRequest { + contents: vec![Content { + parts: vec![Part { + text: prompt.to_string(), + }], + }], + generation_config: GenerationConfig { + max_output_tokens: 1024, + }, + }; + + let url = format!( + "{}/v1beta/models/{}:generateContent?key={}", + self.endpoint, self.model, self.api_key + ); + + let response = self + .client + .post(&url) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to call Google Gemini API")?; + + if !response.status().is_success() { + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Google Gemini API returned {status}: {text}"); + } + + let resp: GenerateContentResponse = response + .json() + .await + .context("Failed to parse Google Gemini response")?; + + resp.candidates + .and_then(|c| c.into_iter().next()) + .and_then(|c| c.content.parts.into_iter().next()) + .and_then(|p| p.text) + .ok_or_else(|| anyhow::anyhow!("Google Gemini returned empty response")) + } +} + +#[async_trait] +impl AiProvider for GoogleProvider { + async fn explain(&self, finding: &RawFinding) -> Result { + let prompt = build_explain_prompt(finding); + self.call(&prompt).await + } + + async fn generate_fix(&self, finding: &RawFinding) -> Result { + let prompt = build_fix_prompt(finding); + self.call(&prompt).await + } +} diff --git a/crates/scanner/src/ai/mod.rs b/crates/scanner/src/ai/mod.rs new file mode 100644 index 0000000..f08fc78 --- /dev/null +++ b/crates/scanner/src/ai/mod.rs @@ -0,0 +1,164 @@ +//! Multi-provider AI system for generating vulnerability explanations and fixes. +//! +//! Supports Anthropic, OpenAI, Google Gemini, and custom OpenAI-compatible endpoints. +//! Users can bring their own API keys. + +pub mod anthropic; +pub mod custom; +pub mod google; +pub mod openai; + +use crate::finding::RawFinding; +use anyhow::Result; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// Supported AI provider types. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ProviderKind { + Anthropic, + OpenAi, + Google, + Custom, +} + +impl std::fmt::Display for ProviderKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProviderKind::Anthropic => write!(f, "Anthropic"), + ProviderKind::OpenAi => write!(f, "OpenAI"), + ProviderKind::Google => write!(f, "Google"), + ProviderKind::Custom => write!(f, "Custom"), + } + } +} + +/// Configuration for an AI provider. +/// +/// Supports bring-your-own-key: users pass their API key and optionally +/// a custom endpoint URL for self-hosted or alternative providers. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AiConfig { + /// Which provider to use. + pub provider: ProviderKind, + + /// API key for the provider. + pub api_key: String, + + /// Model identifier (e.g. "claude-sonnet-4-20250514", "gpt-4o", "gemini-2.0-flash"). + pub model: String, + + /// Custom endpoint URL. Required for `Custom` provider, optional for others + /// (overrides default endpoint when set). + pub endpoint: Option, +} + +/// Trait for AI providers that generate vulnerability explanations and fixes. +/// +/// Each provider (Anthropic, OpenAI, Google, Custom) implements this trait. +/// The trait is object-safe so we can use `Box`. +#[async_trait] +pub trait AiProvider: Send + Sync { + /// Generate a plain-English explanation of a vulnerability finding. + async fn explain(&self, finding: &RawFinding) -> Result; + + /// Generate corrected code that fixes the vulnerability. + async fn generate_fix(&self, finding: &RawFinding) -> Result; +} + +/// Create an AI provider from configuration. +/// +/// # Errors +/// Returns an error if the config is invalid (e.g. custom provider without endpoint). +pub fn create_provider(config: &AiConfig) -> Result> { + match config.provider { + ProviderKind::Anthropic => { + let endpoint = config + .endpoint + .clone() + .unwrap_or_else(|| "https://api.anthropic.com".to_string()); + Ok(Box::new(anthropic::AnthropicProvider::new( + config.api_key.clone(), + config.model.clone(), + endpoint, + ))) + } + ProviderKind::OpenAi => { + let endpoint = config + .endpoint + .clone() + .unwrap_or_else(|| "https://api.openai.com".to_string()); + Ok(Box::new(openai::OpenAiProvider::new( + config.api_key.clone(), + config.model.clone(), + endpoint, + ))) + } + ProviderKind::Google => { + let endpoint = config + .endpoint + .clone() + .unwrap_or_else(|| "https://generativelanguage.googleapis.com".to_string()); + Ok(Box::new(google::GoogleProvider::new( + config.api_key.clone(), + config.model.clone(), + endpoint, + ))) + } + ProviderKind::Custom => { + let endpoint = config + .endpoint + .clone() + .ok_or_else(|| anyhow::anyhow!("Custom provider requires an endpoint URL"))?; + Ok(Box::new(custom::CustomProvider::new( + config.api_key.clone(), + config.model.clone(), + endpoint, + ))) + } + } +} + +/// Build the system prompt used across all AI providers. +pub(crate) fn build_explain_prompt(finding: &RawFinding) -> String { + format!( + "You are a security expert explaining vulnerabilities to developers who may not have security experience.\n\n\ + Analyze this security finding and explain it in plain English:\n\n\ + **Title:** {title}\n\ + **Severity:** {severity}\n\ + {cve}\ + {cwe}\ + **Vulnerable code:**\n```\n{code}\n```\n\n\ + Explain:\n\ + 1. What the vulnerability is\n\ + 2. Why it's dangerous (real-world impact)\n\ + 3. How an attacker could exploit it\n\n\ + Keep it under 200 words. No jargon. Speak to a developer who built this with an AI tool and has no security background.", + title = finding.title, + severity = finding.severity, + cve = finding + .cve_id + .as_ref() + .map(|id| format!("**CVE:** {id}\n")) + .unwrap_or_default(), + cwe = finding + .cwe_id + .as_ref() + .map(|id| format!("**CWE:** {id}\n")) + .unwrap_or_default(), + code = finding.vulnerable_code, + ) +} + +/// Build the prompt for generating a fix. +pub(crate) fn build_fix_prompt(finding: &RawFinding) -> String { + format!( + "You are a security expert. Fix this vulnerable code.\n\n\ + **Title:** {title}\n\ + **Vulnerable code:**\n```\n{code}\n```\n\n\ + Return ONLY the corrected code. No explanation, no markdown fences, just the fixed code.", + title = finding.title, + code = finding.vulnerable_code, + ) +} diff --git a/crates/scanner/src/ai/openai.rs b/crates/scanner/src/ai/openai.rs new file mode 100644 index 0000000..ffc705f --- /dev/null +++ b/crates/scanner/src/ai/openai.rs @@ -0,0 +1,114 @@ +//! OpenAI-compatible AI provider. +//! +//! Works with OpenAI, Groq, Together, and any OpenAI-compatible API. +//! Users can override the endpoint to point at alternative providers. + +use super::{AiProvider, build_explain_prompt, build_fix_prompt}; +use crate::finding::RawFinding; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// OpenAI-compatible API provider. +/// +/// Works with native OpenAI and any API that implements the +/// Chat Completions endpoint (Groq, Together, Fireworks, etc.). +pub struct OpenAiProvider { + api_key: String, + model: String, + endpoint: String, + client: reqwest::Client, +} + +impl OpenAiProvider { + /// Create a new OpenAI-compatible provider. + pub fn new(api_key: String, model: String, endpoint: String) -> Self { + Self { + api_key, + model, + endpoint, + client: reqwest::Client::new(), + } + } +} + +#[derive(Serialize)] +struct ChatCompletionRequest { + model: String, + messages: Vec, + max_tokens: u32, +} + +#[derive(Serialize)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Deserialize)] +struct ChatCompletionResponse { + choices: Vec, +} + +#[derive(Deserialize)] +struct Choice { + message: ResponseMessage, +} + +#[derive(Deserialize)] +struct ResponseMessage { + content: Option, +} + +impl OpenAiProvider { + /// Call the OpenAI-compatible Chat Completions API. + async fn call(&self, prompt: &str) -> Result { + let body = ChatCompletionRequest { + model: self.model.clone(), + messages: vec![ChatMessage { + role: "user".to_string(), + content: prompt.to_string(), + }], + max_tokens: 1024, + }; + + let response = self + .client + .post(format!("{}/v1/chat/completions", self.endpoint)) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to call OpenAI API")?; + + if !response.status().is_success() { + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("OpenAI API returned {status}: {text}"); + } + + let resp: ChatCompletionResponse = response + .json() + .await + .context("Failed to parse OpenAI response")?; + + resp.choices + .first() + .and_then(|c| c.message.content.clone()) + .ok_or_else(|| anyhow::anyhow!("OpenAI returned empty response")) + } +} + +#[async_trait] +impl AiProvider for OpenAiProvider { + async fn explain(&self, finding: &RawFinding) -> Result { + let prompt = build_explain_prompt(finding); + self.call(&prompt).await + } + + async fn generate_fix(&self, finding: &RawFinding) -> Result { + let prompt = build_fix_prompt(finding); + self.call(&prompt).await + } +} diff --git a/crates/scanner/src/engine.rs b/crates/scanner/src/engine.rs index d6131f1..d247ff6 100644 --- a/crates/scanner/src/engine.rs +++ b/crates/scanner/src/engine.rs @@ -1,66 +1,52 @@ -//! Scan engine orchestrator — runs all requested engines in parallel. +//! Scan engine orchestrator — runs all requested engines and merges results. + +use crate::{ScanConfig, finding::RawFinding}; +use serde::{Deserialize, Serialize}; + +/// Scan engines available in Zenvra. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Engine { + /// Static Application Security Testing — analyses source code patterns. + Sast, + /// Software Composition Analysis — checks dependency vulnerabilities. + Sca, + /// Detects hardcoded secrets, API keys, and credentials. + Secrets, + /// Patterns specific to AI/vibe-generated code. + AiCode, +} -use crate::{Engine, finding::Finding, language::Language}; +impl std::fmt::Display for Engine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Engine::Sast => write!(f, "SAST"), + Engine::Sca => write!(f, "SCA"), + Engine::Secrets => write!(f, "Secrets"), + Engine::AiCode => write!(f, "AI Code"), + } + } +} -/// Run all requested scan engines concurrently and merge results. -pub async fn run( - code: &str, - language: Language, - engines: &[Engine], -) -> anyhow::Result> { +/// Run all requested scan engines and merge results. +/// +/// Engines run sequentially for now; will be parallelised with `tokio::join!` +/// once individual engines are mature enough. +pub async fn run(config: &ScanConfig) -> anyhow::Result> { let mut findings = Vec::new(); - // TODO: Run engines concurrently with tokio::join! in future iterations. - // For now, sequential to keep the skeleton simple and testable. - for engine in engines { + for engine in &config.engines { let mut results = match engine { - Engine::Sast => sast::scan(code, &language).await?, - Engine::Sca => sca::scan(code, &language).await?, - Engine::Secrets => secrets::scan(code).await?, - Engine::AiCode => ai_code::scan(code, &language).await?, + Engine::Sast => crate::engines::sast::run(config).await?, + Engine::Sca => crate::engines::sca::run(config).await?, + Engine::Secrets => crate::engines::secrets::run(config).await?, + Engine::AiCode => crate::engines::ai_code::run(config).await?, }; findings.append(&mut results); } - // Sort by severity descending (critical first) + // Sort by severity descending (critical first). findings.sort_by(|a, b| b.severity.cmp(&a.severity)); Ok(findings) } - -// Engine sub-modules — each will grow into its own file as we implement them. -mod sast { - use crate::{finding::Finding, language::Language}; - - pub async fn scan(_code: &str, _language: &Language) -> anyhow::Result> { - // TODO: Implement Semgrep subprocess call - Ok(vec![]) - } -} - -mod sca { - use crate::{finding::Finding, language::Language}; - - pub async fn scan(_code: &str, _language: &Language) -> anyhow::Result> { - // TODO: Parse dependency files and query OSV API - Ok(vec![]) - } -} - -mod secrets { - use crate::finding::Finding; - - pub async fn scan(_code: &str) -> anyhow::Result> { - // TODO: Implement Gitleaks regex patterns - Ok(vec![]) - } -} - -mod ai_code { - use crate::{finding::Finding, language::Language}; - - pub async fn scan(_code: &str, _language: &Language) -> anyhow::Result> { - // TODO: AI-code specific pattern detection - Ok(vec![]) - } -} diff --git a/crates/scanner/src/engines/ai_code.rs b/crates/scanner/src/engines/ai_code.rs new file mode 100644 index 0000000..8b12250 --- /dev/null +++ b/crates/scanner/src/engines/ai_code.rs @@ -0,0 +1,15 @@ +//! AI code engine — patterns specific to AI/vibe-generated code. +//! +//! Detects common vulnerabilities introduced by AI code generators. + +use crate::{ScanConfig, finding::RawFinding}; +use anyhow::Result; + +/// Run AI-code-specific pattern detection. +/// +/// Returns a list of raw findings for AI-generated code anti-patterns. +pub async fn run(_config: &ScanConfig) -> Result> { + // TODO: implement AI-specific pattern detection + tracing::info!("AI code engine: not yet implemented"); + Ok(vec![]) +} diff --git a/crates/scanner/src/engines/mod.rs b/crates/scanner/src/engines/mod.rs index c2a7b2a..980b264 100644 --- a/crates/scanner/src/engines/mod.rs +++ b/crates/scanner/src/engines/mod.rs @@ -1,3 +1,6 @@ +//! Scan engine sub-modules. + +pub mod ai_code; pub mod sast; pub mod sca; pub mod secrets; diff --git a/crates/scanner/src/engines/sast.rs b/crates/scanner/src/engines/sast.rs index 621b46e..661569a 100644 --- a/crates/scanner/src/engines/sast.rs +++ b/crates/scanner/src/engines/sast.rs @@ -1,14 +1,15 @@ //! SAST engine — static application security testing. +//! //! Wraps Semgrep and custom Zenvra rules. -//! See issue #2 for implementation details. -use crate::{Finding, ScanConfig}; +use crate::{ScanConfig, finding::RawFinding}; use anyhow::Result; -/// Run SAST analysis against the target in `config`. -/// Returns a list of findings sorted by severity (highest first). -pub async fn run(_config: &ScanConfig) -> Result> { - // TODO (#2): implement Semgrep subprocess integration +/// Run SAST analysis against the code in `config`. +/// +/// Returns a list of raw findings sorted by severity (highest first). +pub async fn run(_config: &ScanConfig) -> Result> { + // TODO: implement Semgrep subprocess integration tracing::info!("SAST engine: not yet implemented"); Ok(vec![]) } diff --git a/crates/scanner/src/engines/sca.rs b/crates/scanner/src/engines/sca.rs index 1afa626..8e28061 100644 --- a/crates/scanner/src/engines/sca.rs +++ b/crates/scanner/src/engines/sca.rs @@ -1,11 +1,15 @@ //! SCA engine — software composition analysis. +//! //! Parses lockfiles and queries OSV/NVD for known CVEs. -//! See issue #4 for implementation details. -use crate::{Finding, ScanConfig}; +use crate::{ScanConfig, finding::RawFinding}; use anyhow::Result; -pub async fn run(_config: &ScanConfig) -> Result> { +/// Run SCA analysis — parse dependency files and check for known vulnerabilities. +/// +/// Returns a list of raw findings for vulnerable dependencies. +pub async fn run(_config: &ScanConfig) -> Result> { + // TODO: parse lockfiles (Cargo.lock, package-lock.json, etc.) and query OSV API tracing::info!("SCA engine: not yet implemented"); Ok(vec![]) } diff --git a/crates/scanner/src/engines/secrets.rs b/crates/scanner/src/engines/secrets.rs index 937418b..fe71742 100644 --- a/crates/scanner/src/engines/secrets.rs +++ b/crates/scanner/src/engines/secrets.rs @@ -1,10 +1,295 @@ //! Secrets scanner — detects API keys, tokens, and credentials in code. -//! See issue #3 for implementation details. +//! +//! Uses compiled regex patterns inspired by Gitleaks to detect hardcoded +//! secrets across all languages. Each pattern maps to a severity and +//! descriptive name for clear reporting. -use crate::{Finding, ScanConfig}; +use crate::engine::Engine; +use crate::finding::{RawFinding, Severity}; +use crate::ScanConfig; use anyhow::Result; +use regex::Regex; -pub async fn run(_config: &ScanConfig) -> Result> { - tracing::info!("Secrets engine: not yet implemented"); - Ok(vec![]) +/// A pattern for detecting a specific type of secret. +struct SecretPattern { + /// Human-friendly name (e.g. "AWS Access Key"). + name: &'static str, + /// Compiled regex to match against each line. + regex: Regex, + /// Severity of this type of secret exposure. + severity: Severity, + /// CWE identifier for hardcoded credentials. + cwe_id: &'static str, +} + +/// Build the list of secret detection patterns. +/// +/// Each pattern is a regex that matches a specific type of secret. +/// Patterns are ordered by severity (critical first) for consistency. +fn build_patterns() -> Vec { + vec![ + // ── Critical: cloud provider keys ────────────────────────────────── + SecretPattern { + name: "AWS Access Key ID", + regex: Regex::new(r"(?i)(AKIA[0-9A-Z]{16})").expect("valid regex"), + severity: Severity::Critical, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "AWS Secret Access Key", + regex: Regex::new(r#"(?i)aws_secret_access_key\s*[=:]\s*['"]?([A-Za-z0-9/+=]{40})['"]?"#) + .expect("valid regex"), + severity: Severity::Critical, + cwe_id: "CWE-798", + }, + // ── Critical: private keys ───────────────────────────────────────── + SecretPattern { + name: "Private Key", + regex: Regex::new(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----").expect("valid regex"), + severity: Severity::Critical, + cwe_id: "CWE-321", + }, + // ── High: API keys and tokens ────────────────────────────────────── + SecretPattern { + name: "GitHub Personal Access Token", + regex: Regex::new(r"(ghp_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})") + .expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "GitHub OAuth Access Token", + regex: Regex::new(r"gho_[a-zA-Z0-9]{36}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Anthropic API Key", + regex: Regex::new(r"sk-ant-[a-zA-Z0-9_-]{40,}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "OpenAI API Key", + regex: Regex::new(r"sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Stripe Secret Key", + regex: Regex::new(r"(?i)(sk_live_[a-zA-Z0-9]{24,}|sk_test_[a-zA-Z0-9]{24,})").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Stripe Webhook Secret", + regex: Regex::new(r"whsec_[a-zA-Z0-9]{24,}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Slack Webhook URL", + regex: Regex::new(r"https://hooks\.slack\.com/services/T[a-zA-Z0-9]{8,}/B[a-zA-Z0-9]{8,}/[a-zA-Z0-9]{24}") + .expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Slack Bot Token", + regex: Regex::new(r"xoxb-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Google API Key", + regex: Regex::new(r"AIza[0-9A-Za-z_-]{35}").expect("valid regex"), + severity: Severity::High, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Twilio API Key", + regex: Regex::new(r"SK[a-f0-9]{32}").expect("valid regex"), + severity: Severity::Medium, + cwe_id: "CWE-798", + }, + // ── Medium: database and connection strings ──────────────────────── + SecretPattern { + name: "Database Connection String", + regex: Regex::new(r#"(?i)(postgres|mysql|mongodb(\+srv)?|redis)://[^\s'"]+:[^\s'"]+@[^\s'"]+"#) + .expect("valid regex"), + severity: Severity::Medium, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "JWT Token", + regex: Regex::new(r"eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}") + .expect("valid regex"), + severity: Severity::Medium, + cwe_id: "CWE-798", + }, + // ── Medium: generic patterns ─────────────────────────────────────── + SecretPattern { + name: "Generic API Key Assignment", + regex: Regex::new(r#"(?i)(api_key|apikey|api_secret|secret_key|access_token)\s*[=:]\s*['"][a-zA-Z0-9_\-/.]{16,}['"]"#) + .expect("valid regex"), + severity: Severity::Medium, + cwe_id: "CWE-798", + }, + SecretPattern { + name: "Generic Password Assignment", + regex: Regex::new(r#"(?i)(password|passwd|pwd)\s*[=:]\s*['"][^'"]{8,}['"]"#) + .expect("valid regex"), + severity: Severity::Medium, + cwe_id: "CWE-798", + }, + ] +} + +/// Run the secrets scanner against the code in `config`. +/// +/// Scans each line of code against all secret patterns and returns +/// a finding for each match. +pub async fn run(config: &ScanConfig) -> Result> { + let patterns = build_patterns(); + let mut findings = Vec::new(); + + for (line_num, line) in config.code.lines().enumerate() { + let line_number = (line_num + 1) as u32; + + // Skip comment-only lines (basic heuristic). + let trimmed = line.trim(); + if trimmed.starts_with("//") + || trimmed.starts_with('#') + || trimmed.starts_with("/*") + || trimmed.starts_with('*') + { + // Still scan — secrets in comments are still secrets. + // But we could add an option to skip in the future. + } + + for pattern in &patterns { + if pattern.regex.is_match(line) { + // Extract the matched secret (redacted for safety). + let matched = pattern + .regex + .find(line) + .map(|m| m.as_str()) + .unwrap_or("[match]"); + + // Redact all but first 4 and last 4 characters. + let redacted = redact_secret(matched); + + findings.push(RawFinding { + engine: Engine::Secrets, + cve_id: None, + cwe_id: Some(pattern.cwe_id.to_string()), + severity: pattern.severity.clone(), + title: format!("Hardcoded {} detected", pattern.name), + vulnerable_code: line.to_string(), + line_start: line_number, + line_end: line_number, + file_path: config.file_path.clone(), + }); + + tracing::debug!( + "Secret found: {} at line {} (redacted: {})", + pattern.name, + line_number, + redacted + ); + + // Only report the first pattern match per line to avoid duplicates. + break; + } + } + } + + Ok(findings) +} + +/// Redact a secret value, showing only the first 4 and last 4 characters. +fn redact_secret(secret: &str) -> String { + if secret.len() <= 12 { + return "*".repeat(secret.len()); + } + let prefix = &secret[..4]; + let suffix = &secret[secret.len() - 4..]; + format!("{prefix}{}…{suffix}", "*".repeat(secret.len() - 8)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::language::Language; + + async fn scan_code(code: &str) -> Vec { + let config = ScanConfig { + code: code.to_string(), + language: Language::Unknown, + engines: vec![Engine::Secrets], + ai_config: None, + file_path: None, + }; + run(&config).await.expect("scan should succeed") + } + + #[tokio::test] + async fn detects_aws_access_key() { + let findings = scan_code("const key = \"AKIAIOSFODNN7EXAMPLE\";").await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("AWS Access Key")); + assert_eq!(findings[0].severity, Severity::Critical); + } + + #[tokio::test] + async fn detects_github_pat() { + let findings = scan_code("token = \"ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij\"").await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("GitHub Personal Access Token")); + } + + #[tokio::test] + async fn detects_stripe_key() { + let findings = scan_code("STRIPE_KEY = \"SK_LIVE_000000000000000000000000\"").await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("Stripe Secret Key")); + } + + #[tokio::test] + async fn detects_private_key() { + let findings = scan_code("-----BEGIN RSA PRIVATE KEY-----").await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("Private Key")); + assert_eq!(findings[0].severity, Severity::Critical); + } + + #[tokio::test] + async fn detects_generic_password() { + let findings = scan_code(r#"password = "my_super_secret_password""#).await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("Password")); + } + + #[tokio::test] + async fn detects_database_url() { + let findings = + scan_code("DATABASE_URL=postgres://admin:pass123@localhost:5432/mydb").await; + assert_eq!(findings.len(), 1); + assert!(findings[0].title.contains("Database Connection String")); + } + + #[tokio::test] + async fn clean_code_produces_no_findings() { + let findings = scan_code( + "fn main() {\n println!(\"Hello, world!\");\n}", + ) + .await; + assert!(findings.is_empty()); + } + + #[test] + fn redact_works() { + assert_eq!(redact_secret("AKIAIOSFODNN7EXAMPLE"), "AKIA************…MPLE"); + assert_eq!(redact_secret("short"), "*****"); + } } diff --git a/crates/scanner/src/finding.rs b/crates/scanner/src/finding.rs index 8c76f00..1e4becc 100644 --- a/crates/scanner/src/finding.rs +++ b/crates/scanner/src/finding.rs @@ -4,7 +4,55 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -/// A single security finding from a scan. +/// A raw finding from a scan engine, before AI enrichment. +/// +/// This is what engines produce. It gets converted to a [`Finding`] +/// after the AI provider generates an explanation and fix. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RawFinding { + pub engine: crate::Engine, + + /// CVE identifier if one exists (e.g. "CVE-2025-12345"). + pub cve_id: Option, + + /// CWE identifier (e.g. "CWE-89" for SQL injection). + pub cwe_id: Option, + + pub severity: Severity, + + /// Short title of the vulnerability. + pub title: String, + + /// The vulnerable code snippet. + pub vulnerable_code: String, + + pub line_start: u32, + pub line_end: u32, + pub file_path: Option, +} + +impl RawFinding { + /// Convert a raw finding into a full [`Finding`] with AI-generated content. + pub fn into_finding(self, explanation: String, fixed_code: String) -> Finding { + Finding { + id: Uuid::new_v4(), + engine: self.engine, + cve_id: self.cve_id, + cwe_id: self.cwe_id, + severity: self.severity, + title: self.title, + explanation, + vulnerable_code: self.vulnerable_code, + fixed_code, + line_start: self.line_start, + line_end: self.line_end, + file_path: self.file_path, + detected_at: Utc::now(), + } + } +} + +/// A fully enriched security finding from a scan. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Finding { pub id: Uuid, diff --git a/crates/scanner/src/lib.rs b/crates/scanner/src/lib.rs index 227ee98..0b22a65 100644 --- a/crates/scanner/src/lib.rs +++ b/crates/scanner/src/lib.rs @@ -1,42 +1,82 @@ //! Zenvra Scanner — core vulnerability detection engine. //! //! This crate exposes the primary scanning API used by the CLI, web API, -//! and VS Code extension. It orchestrates SAST, SCA, and secrets detection. +//! and VS Code extension. It orchestrates SAST, SCA, and secrets detection, +//! and supports multiple AI providers for generating explanations and fixes. +pub mod ai; pub mod engine; +pub mod engines; pub mod finding; pub mod language; -pub use finding::{Finding, Severity}; +pub use engine::Engine; +pub use finding::{Finding, RawFinding, Severity}; pub use language::Language; +use serde::{Deserialize, Serialize}; + +/// Configuration for a scan run. +/// +/// Holds the source code, detected language, which engines to run, +/// and optional AI provider config for generating explanations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ScanConfig { + /// The source code to scan. + pub code: String, + + /// Programming language of the code. + pub language: Language, + + /// Which scan engines to run (e.g. SAST, SCA, Secrets). + pub engines: Vec, + + /// Optional AI provider configuration for explanations and fixes. + pub ai_config: Option, + + /// Optional file path for context in findings. + pub file_path: Option, +} + /// Run a full scan on the provided source code. /// /// # Arguments -/// * `code` - The source code to scan -/// * `language` - Programming language of the code -/// * `engines` - Which scan engines to run +/// * `config` - The scan configuration including code, language, and engines. /// /// # Returns /// A list of [`Finding`]s, sorted by severity (critical first). -pub async fn scan( - code: &str, - language: Language, - engines: &[Engine], -) -> anyhow::Result> { - engine::run(code, language, engines).await -} +pub async fn scan(config: &ScanConfig) -> anyhow::Result> { + let raw_findings = engine::run(config).await?; + + // If AI config is provided, enrich findings with explanations and fixes. + // Otherwise, return raw findings converted to Finding without AI enrichment. + let findings = if let Some(ai_config) = &config.ai_config { + let provider = ai::create_provider(ai_config)?; + let mut enriched = Vec::with_capacity(raw_findings.len()); + for raw in raw_findings { + let explanation = match provider.explain(&raw).await { + Ok(exp) => exp, + Err(e) => { + tracing::warn!("AI explain failed for {}: {}", raw.title, e); + String::from("AI explanation unavailable.") + } + }; + let fixed_code = match provider.generate_fix(&raw).await { + Ok(fix) => fix, + Err(e) => { + tracing::warn!("AI fix generation failed for {}: {}", raw.title, e); + String::new() + } + }; + enriched.push(raw.into_finding(explanation, fixed_code)); + } + enriched + } else { + raw_findings + .into_iter() + .map(|r| r.into_finding(String::new(), String::new())) + .collect() + }; -/// Scan engines available in Zenvra. -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum Engine { - /// Static Application Security Testing — analyses source code patterns. - Sast, - /// Software Composition Analysis — checks dependency vulnerabilities. - Sca, - /// Detects hardcoded secrets, API keys, and credentials. - Secrets, - /// Patterns specific to AI/vibe-generated code. - AiCode, + Ok(findings) } diff --git a/test-fixtures/README.md b/test-fixtures/README.md new file mode 100644 index 0000000..507acfc --- /dev/null +++ b/test-fixtures/README.md @@ -0,0 +1,5 @@ +# Test Fixtures + +These files contain intentional security vulnerabilities for testing the Zenvra scanner. + +**DO NOT use any of the secrets in these files — they are fake/expired test values.** diff --git a/test-fixtures/vulnerable_app.py b/test-fixtures/vulnerable_app.py new file mode 100644 index 0000000..13800a5 --- /dev/null +++ b/test-fixtures/vulnerable_app.py @@ -0,0 +1,50 @@ +# This file contains intentional hardcoded secrets for testing Zenvra's secrets scanner. +# None of these are real credentials. + +import os +import boto3 + +# Hardcoded AWS credentials (should be flagged) +AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE" +AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + +# Hardcoded database URL with credentials (should be flagged) +DATABASE_URL = "postgres://admin:supersecretpassword@prod-db.example.com:5432/mydb" + +# Hardcoded Stripe key (should be flagged) +STRIPE_KEY = "SK_LIVE_000000000000000000000000" + +# Hardcoded GitHub token (should be flagged) +GITHUB_TOKEN = "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij" + +# Hardcoded generic password (should be flagged) +password = "my_super_secret_password_123" + +# This is fine — reading from env +SAFE_API_KEY = os.environ.get("API_KEY") + +# Private key (should be flagged) +PRIVATE_KEY = """ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA0Z3VR...fake...key...data +-----END RSA PRIVATE KEY----- +""" + +def connect_to_aws(): + """Connect to AWS using hardcoded credentials — BAD PRACTICE.""" + client = boto3.client( + "s3", + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + ) + return client + + +def main(): + print("This is a test file with intentional vulnerabilities.") + client = connect_to_aws() + print(f"Connected: {client}") + + +if __name__ == "__main__": + main()