Small fixes plus better checks

jakozaur · jakozaur · commit 22c813f42b13 · 2025-08-16T20:05:11.000+02:00
diff --git a/.github/workflows/lint-type-check.yml b/.github/workflows/lint-type-check.yml
@@ -1,28 +1,32 @@
-name: Lint and Type Check
+name: Lint, Type Check, and Test
 
 on:
   push:
-    branches: [ main ]
+    branches: [main]
   pull_request:
-    branches: [ main ]
+    branches: [main]
 
 jobs:
   check:
     runs-on: ubuntu-latest
-    
+
     steps:
       - uses: actions/checkout@v4
-      
+
       - uses: actions/setup-python@v5
         with:
           python-version: "3.12"
-      
+
       - uses: astral-sh/setup-uv@v4
-      
-      - run: uv sync --dev
-      
-      - run: uv run ruff format src --check
-        
-      - run: uv run ruff check src
-      
-      - run: uv run ty check src
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Check formatting
+        run: uv run ruff format src --check
+
+      - name: Run linting
+        run: uv run ruff check src
+
+      - name: Run type checking
+        run: uv run ty check src
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -0,0 +1,29 @@
+name: Python Unit Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    name: Python Unit Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.13
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run tests
+        run: uv run pytest tests/ -v --tb=short --junit-xml=test-results.xml
diff --git a/README.md b/README.md
@@ -351,3 +351,77 @@ This toolkit is designed exclusively for:
 - **Create robust, automated reproduction** for maximum reproducibility
 - **Develop reusable methodologies** for methodological insight points
 - **Document everything clearly** for report clarity scoring
+
+
+## CI/CD and Testing
+
+This project uses GitHub Actions for continuous integration and testing.
+
+### Automated Checks
+
+All pull requests and pushes to main trigger:
+
+1. **Code Quality Checks** (`lint-type-check.yml`)
+   - Formatting verification with `ruff format`
+   - Linting with `ruff check`
+   - Type checking with `ty`
+   - Unit tests with `pytest`
+   - Code coverage reporting
+
+2. **Test Matrix** (`test-matrix.yml`)
+   - Tests across multiple Python versions (3.12, 3.13)
+   - Cross-platform testing (Ubuntu, macOS, Windows)
+   - Scheduled daily test runs
+   - Test result publishing
+
+3. **Pre-commit Hooks** (`pre-commit.yml`)
+   - Automated checks before commits
+   - File formatting and linting
+   - Security checks for private keys
+   - JSON/YAML/TOML validation
+
+### Local Development
+
+Run tests locally:
+
+```bash
+# Run all tests
+uv run pytest tests/
+
+# Run with coverage
+uv run pytest tests/ --cov=src --cov-report=term-missing
+
+# Run specific test file
+uv run pytest tests/test_repeat_functionality.py -v
+
+# Run linting and formatting
+uv run ruff check src
+uv run ruff format src
+
+# Type checking
+uv run ty check src
+```
+
+### Setting up Pre-commit Hooks
+
+Install pre-commit hooks for automatic checks:
+
+```bash
+uv pip install pre-commit
+pre-commit install
+pre-commit run --all-files  # Run manually on all files
+```
+
+### Test Coverage
+
+The project aims for high test coverage. Current test suite includes:
+
+- Unit tests for core functionality
+- Integration tests for CLI commands
+- Test fixtures for mock responses
+- Parameterized tests for multiple scenarios
+
+Coverage reports are automatically generated and can be viewed:
+- In CI: Via GitHub Actions artifacts
+- Locally: `coverage.xml` and terminal output
+- Online: Via Codecov integration (if configured)
diff --git a/src/categories/base.py b/src/categories/base.py
@@ -149,7 +149,7 @@ def run_single_test(
                 category=self.get_vulnerability_category(),
                 confidence=0.0,
                 evidence=[],
-                reasoning=f"Test failed: {e}"
+                reasoning=f"Test failed: {e}",
             )
 
         return test, responses, evaluation
@@ -167,7 +167,7 @@ def _evaluate_test_response(
                 category=self.get_vulnerability_category(),
                 confidence=0.0,
                 evidence=[],
-                reasoning="No responses to evaluate"
+                reasoning="No responses to evaluate",
             )
 
         # For multi-turn tests, evaluate the complete conversation
@@ -239,17 +239,13 @@ def run_category_tests(
                 display.stop_thinking_timer()
                 display.error(f"Failed: {test.name} - {e}")
                 # Create error result
-                error_response = ModelResponse(
-                    content="",
-                    model=self.client.model,
-                    error=str(e)
-                )
+                error_response = ModelResponse(content="", model=self.client.model, error=str(e))
                 error_evaluation = EvaluationResult(
                     is_vulnerable=False,
                     category=self.get_vulnerability_category(),
                     confidence=0.0,
                     evidence=[],
-                    reasoning=f"Test failed: {e}"
+                    reasoning=f"Test failed: {e}",
                 )
                 results.append((test, [error_response], error_evaluation))