diff --git a/.github/workflows/diagnostic-diff.yml b/.github/workflows/diagnostic-diff.yml new file mode 100644 index 00000000..9359bc51 --- /dev/null +++ b/.github/workflows/diagnostic-diff.yml @@ -0,0 +1,111 @@ +name: Diagnostic Diff + +on: + pull_request: + types: [opened, synchronize, reopened] + +# Allow only one concurrent diagnostic diff per PR +concurrency: + group: diagnostic-diff-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + diagnostic-diff: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + + - name: Update Rust toolchain + run: rustup update + + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.13" + + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + command: build + + - name: Run diagnostics on PR branch + run: | + cargo run --release -p karva_diffs --bin karva-diagnostics -- run --output pr-diagnostics.json + continue-on-error: true + + - name: Stash PR diagnostics + run: | + mkdir -p /tmp/karva-diagnostics + cp pr-diagnostics.json /tmp/karva-diagnostics/ + + - name: Checkout main branch + uses: actions/checkout@v4 + with: + ref: main + + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + command: build + + - name: Run diagnostics on main branch + run: | + cargo run --release -p karva_diffs --bin karva-diagnostics -- run --output main-diagnostics.json + continue-on-error: true + + - name: Retrieve PR diagnostics + run: | + cp /tmp/karva-diagnostics/pr-diagnostics.json . + + - name: Generate diff + run: | + cargo run --release -p karva_diffs --bin karva-diagnostics -- diff --base main-diagnostics.json --head pr-diagnostics.json --output diff.md + + - name: Comment PR + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const diff = fs.readFileSync('diff.md', 'utf8'); + + // Find existing comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('Diagnostic Diff Report') + ); + + const body = diff + '\n\n---\n*This comment is automatically generated by the diagnostic diff workflow.*'; + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: body + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + }); + } diff --git a/Cargo.lock b/Cargo.lock index d66b2aa5..46831885 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -934,6 +934,22 @@ dependencies = [ "pretty_assertions", ] +[[package]] +name = "karva_diffs" +version = "0.0.0" +dependencies = [ + "anyhow", + "clap", + "karva_core", + "karva_project", + "karva_test", + "ruff_python_ast", + "serde", + "serde_json", + "tracing", + "tracing-subscriber", +] + [[package]] name = "karva_project" version = "0.0.0" @@ -952,8 +968,12 @@ dependencies = [ "insta", "rand 0.9.2", "regex", + "ruff_python_ast", "ruff_python_trivia", + "serde", + "serde_json", "tempfile", + "tracing", ] [[package]] diff --git a/crates/karva_benchmark/benches/karva_walltime.rs b/crates/karva_benchmark/benches/karva_walltime.rs index c5ad9c71..6ed2269d 100644 --- a/crates/karva_benchmark/benches/karva_walltime.rs +++ b/crates/karva_benchmark/benches/karva_walltime.rs @@ -1,8 +1,8 @@ -use std::{path::PathBuf, sync::Once}; +use std::sync::Once; use karva_benchmark::{ + InstalledProject, RealWorldProject, affect_project, criterion::{BatchSize, Criterion, criterion_group, criterion_main}, - real_world_projects::{InstalledProject, RealWorldProject}, }; use karva_core::{TestRunner, testing::setup_module}; use karva_project::{ @@ -10,7 +10,6 @@ use karva_project::{ project::{Project, ProjectOptions}, verbosity::VerbosityLevel, }; -use ruff_python_ast::PythonVersion; static SETUP_MODULE_ONCE: Once = Once::new(); @@ -73,15 +72,7 @@ fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) { } fn affect(criterion: &mut Criterion) { - let benchmark = ProjectBenchmark::new(RealWorldProject { - name: "affect", - repository: "https://github.com/MatthewMckee4/affect", - commit: "803cc916b492378a8ad8966e747cac3325e11b5f", - paths: vec![PathBuf::from("tests")], - dependencies: vec!["pydantic", "pydantic-settings", "pytest"], - python_version: PythonVersion::PY313, - }); - + let benchmark = ProjectBenchmark::new(affect_project()); bench_project(&benchmark, criterion); } diff --git a/crates/karva_benchmark/src/lib.rs b/crates/karva_benchmark/src/lib.rs index 24b492f4..275038ae 100644 --- a/crates/karva_benchmark/src/lib.rs +++ b/crates/karva_benchmark/src/lib.rs @@ -1,7 +1,12 @@ use std::path::PathBuf; pub mod criterion; -pub mod real_world_projects; + +// Re-export real world projects from karva_test +pub use karva_test::{ + InstalledProject, RealWorldProject, affect_project, get_real_world_projects, + real_world_projects, +}; pub static TRUE_ASSERTIONS: TestFile = TestFile::new( "test_true_assertions.py", @@ -36,19 +41,6 @@ pub static PARAMETRIZE: TestFile = TestFile::new( include_str!("../resources/test_parametrize.py"), ); -/// Relative size of a test case. Benchmarks can use it to configure the time for how long a benchmark should run to get stable results. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] -pub enum TestCaseSpeed { - /// A test case that is fast to run - Fast, - - /// A normal test case - Normal, - - /// A slow test case - Slow, -} - #[derive(Debug, Clone)] pub struct TestCase { file: TestFile, diff --git a/crates/karva_core/src/diagnostic/diagnostic.rs b/crates/karva_core/src/diagnostic/diagnostic.rs index 3d1149d6..7a73302c 100644 --- a/crates/karva_core/src/diagnostic/diagnostic.rs +++ b/crates/karva_core/src/diagnostic/diagnostic.rs @@ -50,7 +50,7 @@ impl Diagnostic { } #[must_use] - pub(crate) const fn severity(&self) -> &DiagnosticSeverity { + pub const fn severity(&self) -> &DiagnosticSeverity { &self.inner.severity } @@ -175,14 +175,14 @@ impl DiagnosticInner { // Diagnostic severity #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum DiagnosticSeverity { +pub enum DiagnosticSeverity { Error(DiagnosticErrorType), Warning(String), } impl DiagnosticSeverity { #[must_use] - pub(crate) const fn is_error(&self) -> bool { + pub const fn is_error(&self) -> bool { matches!(self, Self::Error(_)) } @@ -193,7 +193,7 @@ impl DiagnosticSeverity { } #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum DiagnosticErrorType { +pub enum DiagnosticErrorType { TestCase { test_name: String, diagnostic_type: TestCaseDiagnosticType, @@ -203,17 +203,17 @@ pub(crate) enum DiagnosticErrorType { } #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum TestCaseDiagnosticType { +pub enum TestCaseDiagnosticType { Fail(String), Collection(TestCaseCollectionDiagnosticType), } #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum TestCaseCollectionDiagnosticType { +pub enum TestCaseCollectionDiagnosticType { FixtureNotFound, } #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum FixtureDiagnosticType { +pub enum FixtureDiagnosticType { Invalid, } diff --git a/crates/karva_core/src/runner/diagnostic.rs b/crates/karva_core/src/runner/diagnostic.rs index 44201150..b4bed1ef 100644 --- a/crates/karva_core/src/runner/diagnostic.rs +++ b/crates/karva_core/src/runner/diagnostic.rs @@ -126,6 +126,7 @@ impl TestResultStats { self.inner.values().sum() } + #[must_use] pub fn is_success(&self) -> bool { self.failed() == 0 } @@ -135,17 +136,17 @@ impl TestResultStats { } #[must_use] - pub(crate) fn passed(&self) -> usize { + pub fn passed(&self) -> usize { self.get(TestResultKind::Passed) } #[must_use] - pub(crate) fn failed(&self) -> usize { + pub fn failed(&self) -> usize { self.get(TestResultKind::Failed) } #[must_use] - pub(crate) fn skipped(&self) -> usize { + pub fn skipped(&self) -> usize { self.get(TestResultKind::Skipped) } diff --git a/crates/karva_core/src/runner/mod.rs b/crates/karva_core/src/runner/mod.rs index 34b87e43..6d55f54a 100644 --- a/crates/karva_core/src/runner/mod.rs +++ b/crates/karva_core/src/runner/mod.rs @@ -9,9 +9,9 @@ use crate::{ utils::attach, }; -pub(crate) mod diagnostic; +pub mod diagnostic; -pub(crate) use diagnostic::TestRunResult; +pub use diagnostic::TestRunResult; pub trait TestRunner { fn test(&self) -> TestRunResult { diff --git a/crates/karva_diffs/Cargo.toml b/crates/karva_diffs/Cargo.toml new file mode 100644 index 00000000..05dc06ea --- /dev/null +++ b/crates/karva_diffs/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "karva_diffs" +version = "0.0.0" +description = "Diagnostic diff tests for Karva on real-world projects" +publish = false +authors = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +license = { workspace = true } + +[[bin]] +name = "karva-diagnostics" +path = "src/bin/karva-diagnostics.rs" + +[dependencies] +karva_core = { workspace = true } +karva_project = { workspace = true } +karva_test = { workspace = true } + +anyhow = { workspace = true } +ruff_python_ast = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +clap = { workspace = true } + +[lints] +workspace = true diff --git a/crates/karva_diffs/README.md b/crates/karva_diffs/README.md new file mode 100644 index 00000000..9d79177d --- /dev/null +++ b/crates/karva_diffs/README.md @@ -0,0 +1,126 @@ +# Karva Diffs + +The `karva_diffs` crate tracks diagnostic changes on real-world Python projects, similar to how mypy_primer works for mypy. + +## Purpose + +This crate helps track progress in pytest feature support by: +- Running Karva on real-world projects +- Capturing diagnostic output (test counts, errors, warnings) +- Automatically comparing diagnostics between main and PR branches +- Posting diff reports as PR comments + +## How It Works + +The diagnostic diff workflow runs automatically on every pull request: + +1. **Runs diagnostics on main branch** - Establishes the baseline +2. **Runs diagnostics on PR branch** - Shows current state +3. **Generates a diff** - Compares the two results +4. **Posts a comment** - Shows improvements or regressions in the PR + +## GitHub Actions Workflow + +The `.github/workflows/diagnostic-diff.yml` workflow: +- Triggers on PR open, sync, or reopen +- Runs both sets of diagnostics +- Generates a markdown diff report +- Posts/updates a comment on the PR + +Example comment output: +```markdown +# Diagnostic Diff Report + +## Summary + +| Project | Tests | Passed | Failed | Skipped | Errors | Warnings | +|---------|-------|--------|--------|---------|--------|----------| +| affect | 50 (+5) | 45 (+5) | 5 | 0 | 5 | 2 (-1) | + +## Detailed Changes + +### affect + +- **Passed tests:** 40 → 45 ✅ +- **Warnings:** 3 → 2 ✅ +``` + +## CLI Usage + +The crate provides a `karva-diagnostics` binary for manual runs: + +### Run diagnostics +```shell +# Run on all configured projects and output JSON +cargo run -p karva_diffs --bin karva-diagnostics -- run --output diagnostics.json + +# Or just to stdout +cargo run -p karva_diffs --bin karva-diagnostics -- run +``` + +### Compare two reports +```shell +cargo run -p karva_diffs --bin karva-diagnostics -- diff \ + --base main-diagnostics.json \ + --head pr-diagnostics.json \ + --output diff.md +``` + +## Adding New Projects + +To add a new project for tracking, edit `src/lib.rs` and add to the `get_test_projects()` function: + +```rust +pub fn get_test_projects() -> Vec> { + vec![ + RealWorldProject { + name: "your-project", + repository: "https://github.com/user/repo", + commit: "abc123...", // Pin to specific commit + paths: vec![PathBuf::from("tests")], + dependencies: vec!["pytest", "other-deps"], + python_version: PythonVersion::PY313, + }, + // ... more projects + ] +} +``` + +## Interpreting Results + +When reviewing a PR with diagnostic diffs: + +- ✅ **Green checkmarks** - Improvements (more passing tests, fewer errors) +- ❌ **Red X marks** - Regressions (fewer passing tests, more errors) +- Numbers in parentheses - Show the change from main (e.g., `(+5)` means 5 more than main) + +### What changes mean: + +- **More passed tests** ✅ - New features working or bugs fixed +- **Fewer failed tests** ✅ - Bugs fixed or better compatibility +- **Fewer errors** ✅ - Improved error handling or detection +- **More failed tests** ❌ - Potential regression or new strict checks +- **More errors** ❌ - New issues introduced + +## Development + +The crate consists of: +- `src/lib.rs` - Core logic for running diagnostics and project registry +- `src/bin/karva-diagnostics.rs` - CLI tool for running and comparing diagnostics +- `.github/workflows/diagnostic-diff.yml` - GitHub Actions workflow + +## Requirements + +- Rust toolchain +- Python 3.13+ +- `uv` package manager +- Network access (for cloning projects) + +## Performance + +Running diagnostics can take several minutes depending on: +- Number of projects configured +- Size of test suites +- Whether projects are cached (in `target/benchmark_cache/`) + +The GitHub Actions workflow uses caching and concurrency controls to optimize performance. diff --git a/crates/karva_diffs/src/bin/karva-diagnostics.rs b/crates/karva_diffs/src/bin/karva-diagnostics.rs new file mode 100644 index 00000000..1c10a9ea --- /dev/null +++ b/crates/karva_diffs/src/bin/karva-diagnostics.rs @@ -0,0 +1,253 @@ +#![allow(clippy::print_stdout)] + +use std::{fs, path::PathBuf}; + +use anyhow::{Context, Result}; +use clap::{Parser, Subcommand}; +use karva_core::testing::setup_module; +use karva_diffs::{DiagnosticReport, get_real_world_projects, run_project_diagnostics}; + +#[derive(Parser)] +#[command(name = "karva-diagnostics")] +#[command(about = "Run diagnostic tests on real-world projects", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Run diagnostics on all configured projects and output JSON + Run { + /// Output file for the diagnostic report (defaults to stdout) + #[arg(short, long)] + output: Option, + }, + /// Compare two diagnostic reports and output a markdown diff + Diff { + /// Path to the base report (e.g., from main branch) + #[arg(long)] + base: PathBuf, + + /// Path to the head report (e.g., from PR branch) + #[arg(long)] + head: PathBuf, + + /// Output file for the diff markdown (defaults to stdout) + #[arg(short, long)] + output: Option, + }, +} + +fn main() -> Result<()> { + // Initialize Python module + setup_module(); + + // Setup tracing + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + + match cli.command { + Commands::Run { output } => run_diagnostics(output), + Commands::Diff { base, head, output } => compare_diagnostics(&base, &head, output), + } +} + +fn run_diagnostics(output: Option) -> Result<()> { + let projects = get_real_world_projects(); + let mut report = DiagnosticReport::new(); + + eprintln!("Running diagnostics on {} project(s)...", projects.len()); + + for project in projects { + eprintln!(" Testing project: {}", project.name); + match run_project_diagnostics(project) { + Ok(diagnostics) => { + eprintln!( + " ✓ {} tests ({} passed, {} failed, {} skipped)", + diagnostics.total_tests, + diagnostics.passed, + diagnostics.failed, + diagnostics.skipped + ); + report.add_project(diagnostics); + } + Err(e) => { + eprintln!(" ✗ Failed to run diagnostics: {e}"); + return Err(e); + } + } + } + + let json = report.to_json()?; + + if let Some(path) = output { + fs::write(&path, json).context("Failed to write output file")?; + eprintln!("\nReport written to: {}", path.display()); + } else { + println!("{json}"); + } + + Ok(()) +} + +fn compare_diagnostics(base: &PathBuf, head: &PathBuf, output: Option) -> Result<()> { + let base_json = fs::read_to_string(base) + .context(format!("Failed to read base file: {:?}", base.display()))?; + let head_json = fs::read_to_string(head) + .context(format!("Failed to read head file: {:?}", head.display()))?; + + let base_report = DiagnosticReport::from_json(&base_json)?; + let head_report = DiagnosticReport::from_json(&head_json)?; + + let diff = generate_diff(&base_report, &head_report); + + if let Some(path) = output { + fs::write(&path, diff).context("Failed to write output file")?; + eprintln!("Diff written to: {}", path.display()); + } else { + println!("{diff}"); + } + + Ok(()) +} + +fn generate_diff(base: &DiagnosticReport, head: &DiagnosticReport) -> String { + let mut diff = String::new(); + diff.push_str("# Diagnostic Diff Report\n\n"); + + // Summary table + diff.push_str("## Summary\n\n"); + diff.push_str("| Project | Tests | Passed | Failed | Skipped | Errors | Warnings |\n"); + diff.push_str("|---------|-------|--------|--------|---------|--------|----------|\n"); + + for head_project in &head.projects { + let base_project = base + .projects + .iter() + .find(|p| p.project_name == head_project.project_name); + + if let Some(base_proj) = base_project { + diff.push_str(&format!( + "| {} | {} {} | {} {} | {} {} | {} {} | {} {} | {} {} |\n", + head_project.project_name, + head_project.total_tests, + format_diff(base_proj.total_tests, head_project.total_tests), + head_project.passed, + format_diff(base_proj.passed, head_project.passed), + head_project.failed, + format_diff(base_proj.failed, head_project.failed), + head_project.skipped, + format_diff(base_proj.skipped, head_project.skipped), + head_project.error_count, + format_diff(base_proj.error_count, head_project.error_count), + head_project.warning_count, + format_diff(base_proj.warning_count, head_project.warning_count), + )); + } else { + diff.push_str(&format!( + "| {} | {} | {} | {} | {} | {} | {} |\n", + head_project.project_name, + head_project.total_tests, + head_project.passed, + head_project.failed, + head_project.skipped, + head_project.error_count, + head_project.warning_count, + )); + } + } + + diff.push_str("\n## Detailed Changes\n\n"); + + for head_project in &head.projects { + let base_project = base + .projects + .iter() + .find(|p| p.project_name == head_project.project_name); + + if let Some(base_proj) = base_project { + let has_changes = base_proj.total_tests != head_project.total_tests + || base_proj.passed != head_project.passed + || base_proj.failed != head_project.failed + || base_proj.skipped != head_project.skipped + || base_proj.error_count != head_project.error_count + || base_proj.warning_count != head_project.warning_count; + + if has_changes { + diff.push_str(&format!("### {}\n\n", head_project.project_name)); + + if base_proj.passed != head_project.passed { + diff.push_str(&format!( + "- **Passed tests:** {} → {} {}\n", + base_proj.passed, + head_project.passed, + change_emoji(base_proj.passed, head_project.passed, true) + )); + } + + if base_proj.failed != head_project.failed { + diff.push_str(&format!( + "- **Failed tests:** {} → {} {}\n", + base_proj.failed, + head_project.failed, + change_emoji(base_proj.failed, head_project.failed, false) + )); + } + + if base_proj.error_count != head_project.error_count { + diff.push_str(&format!( + "- **Errors:** {} → {} {}\n", + base_proj.error_count, + head_project.error_count, + change_emoji(base_proj.error_count, head_project.error_count, false) + )); + } + + if base_proj.warning_count != head_project.warning_count { + diff.push_str(&format!( + "- **Warnings:** {} → {} {}\n", + base_proj.warning_count, + head_project.warning_count, + change_emoji(base_proj.warning_count, head_project.warning_count, false) + )); + } + + diff.push('\n'); + } + } + } + + diff +} + +fn format_diff(base: usize, head: usize) -> String { + if base == head { + String::new() + } else { + let diff = head - base; + if diff > 0 { + format!("(+{diff})") + } else { + format!("({diff})") + } + } +} + +const fn change_emoji(base: usize, head: usize, increase_is_good: bool) -> &'static str { + if base == head { + "" + } else if head > base { + if increase_is_good { "✅" } else { "❌" } + } else if increase_is_good { + "❌" + } else { + "✅" + } +} diff --git a/crates/karva_diffs/src/lib.rs b/crates/karva_diffs/src/lib.rs new file mode 100644 index 00000000..f865eb64 --- /dev/null +++ b/crates/karva_diffs/src/lib.rs @@ -0,0 +1,126 @@ +//! Diagnostic diff testing for Karva on real-world projects. +//! +//! This crate tracks diagnostic changes across different versions of Karva +//! by running tests on real-world Python projects and comparing the +//! diagnostics output. This is similar to `mypy_primer` but focused on pytest +//! support tracking. + +use karva_core::TestRunner; +use karva_project::{ + path::absolute, + project::{Project, ProjectOptions}, + verbosity::VerbosityLevel, +}; +// Re-export project registry from karva_test +pub use karva_test::get_real_world_projects; +use karva_test::{InstalledProject, RealWorldProject}; + +/// Helper function to create a Project from an `InstalledProject` +#[must_use] +pub fn create_project(installed: &InstalledProject) -> Project { + let test_paths = installed.config().paths.clone(); + + let absolute_test_paths = test_paths + .iter() + .map(|path| absolute(path, installed.path())) + .collect(); + + Project::new(installed.path().to_path_buf(), absolute_test_paths).with_options( + ProjectOptions::new("test".to_string(), VerbosityLevel::Default, false, true), + ) +} + +/// Serializable diagnostic summary for a single project +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct ProjectDiagnostics { + pub project_name: String, + pub total_tests: usize, + pub passed: usize, + pub failed: usize, + pub skipped: usize, + pub error_count: usize, + pub warning_count: usize, +} + +/// Complete diagnostic report for all projects +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct DiagnosticReport { + pub projects: Vec, +} + +impl DiagnosticReport { + /// Create a new empty report + #[must_use] + pub const fn new() -> Self { + Self { + projects: Vec::new(), + } + } + + /// Add a project's diagnostics to the report + pub fn add_project(&mut self, diagnostics: ProjectDiagnostics) { + self.projects.push(diagnostics); + } + + /// Serialize to JSON string + pub fn to_json(&self) -> anyhow::Result { + Ok(serde_json::to_string_pretty(self)?) + } + + /// Create from JSON string + pub fn from_json(json: &str) -> anyhow::Result { + Ok(serde_json::from_str(json)?) + } +} + +impl Default for DiagnosticReport { + fn default() -> Self { + Self::new() + } +} + +impl ProjectDiagnostics { + /// Create diagnostics from a test run result + #[must_use] + pub fn from_test_result( + project_name: String, + result: &karva_core::runner::diagnostic::TestRunResult, + ) -> Self { + let stats = result.stats(); + let mut error_count = 0; + let mut warning_count = 0; + + for diagnostic in result.diagnostics() { + if diagnostic.severity().is_error() { + error_count += 1; + } else { + warning_count += 1; + } + } + + Self { + project_name, + total_tests: stats.total(), + passed: stats.passed(), + failed: stats.failed(), + skipped: stats.skipped(), + error_count, + warning_count, + } + } +} + +/// Run diagnostics on a project and return the results +pub fn run_project_diagnostics(project: RealWorldProject) -> anyhow::Result { + let project_name = project.name.to_string(); + + // Setup the project (clone, install dependencies) + let installed = project.setup()?; + + // Create and run the project + let project = create_project(&installed); + let result = project.test(); + + // Create diagnostic summary + Ok(ProjectDiagnostics::from_test_result(project_name, &result)) +} diff --git a/crates/karva_test/Cargo.toml b/crates/karva_test/Cargo.toml index 95ea7007..aded1e6e 100644 --- a/crates/karva_test/Cargo.toml +++ b/crates/karva_test/Cargo.toml @@ -18,6 +18,10 @@ anyhow = { workspace = true } ruff_python_trivia = { workspace = true } rand = { workspace = true } dunce = { workspace = true } +tracing = { workspace = true } +ruff_python_ast = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } [lints] workspace = true diff --git a/crates/karva_test/src/lib.rs b/crates/karva_test/src/lib.rs index 1616bdf5..2b63c037 100644 --- a/crates/karva_test/src/lib.rs +++ b/crates/karva_test/src/lib.rs @@ -1,5 +1,9 @@ mod context; +pub mod real_world_projects; mod utils; pub use context::{IntegrationTestContext, TestContext}; +pub use real_world_projects::{ + InstalledProject, RealWorldProject, affect_project, get_real_world_projects, +}; pub use utils::find_karva_wheel; diff --git a/crates/karva_benchmark/src/real_world_projects.rs b/crates/karva_test/src/real_world_projects.rs similarity index 90% rename from crates/karva_benchmark/src/real_world_projects.rs rename to crates/karva_test/src/real_world_projects.rs index 1d3ebc27..3d2ac447 100644 --- a/crates/karva_benchmark/src/real_world_projects.rs +++ b/crates/karva_test/src/real_world_projects.rs @@ -1,25 +1,15 @@ #![allow(clippy::print_stderr)] -//! Infrastructure for benchmarking real-world Python projects. -//! -//! The module uses a setup similar to mypy primer's, which should make it easy -//! to add new benchmarks for projects in [mypy primer's project's list](https://github.com/hauntsaninja/mypy_primer/blob/ebaa9fd27b51a278873b63676fd25490cec6823b/mypy_primer/projects.py#L74). -//! -//! The basic steps for a project are: -//! 1. Clone or update the project into a directory inside `./target`. The commits are pinnted to prevent flaky benchmark results due to new commits. -//! 2. For projects with dependencies, run uv to create a virtual environment and install the dependencies. -//! 3. (optionally) Copy the entire project structure into a memory file system to reduce the IO noise in benchmarks. -//! 4. (not in this module) Create a `ProjectDatabase` and run the benchmark. - use std::{ path::{Path, PathBuf}, process::Command, }; use anyhow::{Context, Result}; -use karva_test::find_karva_wheel; use ruff_python_ast::PythonVersion; +use crate::find_karva_wheel; + fn global_venv_path() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) .parent() @@ -29,16 +19,16 @@ fn global_venv_path() -> PathBuf { .join(".venv") } -/// Configuration for a real-world project to benchmark +/// Configuration for a real-world project to benchmark or test #[derive(Debug, Clone)] pub struct RealWorldProject<'a> { - // The name of the project. + /// The name of the project. pub name: &'a str, /// The project's GIT repository. Must be publicly accessible. pub repository: &'a str, /// Specific commit hash to checkout pub commit: &'a str, - /// List of paths within the project to check (`ty check `) + /// List of paths within the project to test pub paths: Vec, /// Dependencies to install via uv pub dependencies: Vec<&'a str>, @@ -47,7 +37,7 @@ pub struct RealWorldProject<'a> { } impl<'a> RealWorldProject<'a> { - /// Setup a real-world project for benchmarking + /// Setup a real-world project for testing/benchmarking pub fn setup(self) -> Result> { tracing::debug!("Setting up project {}", self.name); @@ -352,3 +342,22 @@ fn cargo_target_directory() -> Option<&'static PathBuf> { }) .as_ref() } + +/// The affect project - a real-world Python project for testing +#[must_use] +pub fn affect_project() -> RealWorldProject<'static> { + RealWorldProject { + name: "affect", + repository: "https://github.com/MatthewMckee4/affect", + commit: "803cc916b492378a8ad8966e747cac3325e11b5f", + paths: vec![PathBuf::from("tests")], + dependencies: vec!["pydantic", "pydantic-settings", "pytest"], + python_version: PythonVersion::PY313, + } +} + +/// Registry of real-world projects used for benchmarking and diagnostic testing +#[must_use] +pub fn get_real_world_projects() -> Vec> { + vec![affect_project()] +}