Merge pull request #52 from frankbria/feature/issue-10-cli-parsing-tests

frankbria · web-flow · commit f286710ee4ef · 2026-01-08T23:06:01.000-07:00
test(cli): add comprehensive CLI argument parsing tests
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -6,6 +6,12 @@ on:
   pull_request:
     branches: [ main ]
 
+env:
+  # Coverage threshold - configurable, not hardcoded
+  # Set to 0 to disable threshold enforcement
+  COVERAGE_THRESHOLD: 70
+  KCOV_VERSION: "42"
+
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -37,3 +43,143 @@ jobs:
       run: |
         echo "## Test Results" >> $GITHUB_STEP_SUMMARY
         echo "✅ Unit tests passed" >> $GITHUB_STEP_SUMMARY
+
+  coverage:
+    runs-on: ubuntu-latest
+    needs: test
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Setup Node.js
+      uses: actions/setup-node@v3
+      with:
+        node-version: '18'
+
+    - name: Install dependencies
+      run: |
+        npm install
+        sudo apt-get update
+        sudo apt-get install -y jq
+
+    - name: Build and install kcov from source
+      run: |
+        # Install kcov build dependencies
+        sudo apt-get install -y \
+          cmake \
+          g++ \
+          binutils-dev \
+          libcurl4-openssl-dev \
+          libdw-dev \
+          libiberty-dev \
+          zlib1g-dev \
+          libssl-dev
+
+        # Clone and build kcov
+        git clone --depth 1 --branch v${KCOV_VERSION} https://github.com/SimonKagstrom/kcov.git /tmp/kcov-src
+        cd /tmp/kcov-src
+        mkdir build && cd build
+        cmake -DCMAKE_INSTALL_PREFIX=/usr/local ..
+        make -j$(nproc)
+        sudo make install
+
+        # Verify installation
+        /usr/local/bin/kcov --version
+
+    - name: Verify kcov installation
+      run: |
+        which kcov
+        kcov --version
+
+    - name: Run tests with coverage
+      run: |
+        mkdir -p coverage
+
+        # Run CLI parsing tests under kcov
+        kcov --include-path="$(pwd)/ralph_loop.sh,$(pwd)/lib" \
+             --exclude-pattern=tests/,node_modules/ \
+             coverage/cli-parsing \
+             bash -c "bats tests/unit/test_cli_parsing.bats" || true
+
+        # Run all unit tests under kcov for comprehensive coverage
+        kcov --include-path="$(pwd)/ralph_loop.sh,$(pwd)/lib" \
+             --exclude-pattern=tests/,node_modules/ \
+             coverage/all-unit \
+             bash -c "bats tests/unit/" || true
+
+    - name: Parse coverage results
+      id: coverage
+      run: |
+        # Extract coverage percentage from kcov JSON output
+        COVERAGE_FILE="coverage/all-unit/kcov-merged/coverage.json"
+
+        if [[ -f "$COVERAGE_FILE" ]]; then
+          COVERAGE_PCT=$(jq -r '.percent_covered // "0"' "$COVERAGE_FILE" | cut -d'.' -f1)
+          echo "coverage_percent=$COVERAGE_PCT" >> $GITHUB_OUTPUT
+          echo "Coverage: ${COVERAGE_PCT}%"
+        else
+          # Fallback: try to find any coverage.json
+          COVERAGE_FILE=$(find coverage -name "coverage.json" -type f 2>/dev/null | head -1)
+          if [[ -n "$COVERAGE_FILE" && -f "$COVERAGE_FILE" ]]; then
+            COVERAGE_PCT=$(jq -r '.percent_covered // "0"' "$COVERAGE_FILE" | cut -d'.' -f1)
+            echo "coverage_percent=$COVERAGE_PCT" >> $GITHUB_OUTPUT
+            echo "Coverage (from $COVERAGE_FILE): ${COVERAGE_PCT}%"
+          else
+            echo "coverage_percent=0" >> $GITHUB_OUTPUT
+            echo "Warning: Could not find coverage results"
+            # List what we do have for debugging
+            find coverage -type f -name "*.json" 2>/dev/null || echo "No JSON files found"
+            ls -laR coverage/ 2>/dev/null || echo "Coverage directory empty or not found"
+          fi
+        fi
+
+    - name: Check coverage threshold
+      run: |
+        COVERAGE=${{ steps.coverage.outputs.coverage_percent }}
+        THRESHOLD=${{ env.COVERAGE_THRESHOLD }}
+
+        echo "## Coverage Report" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
+        echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
+        echo "| Coverage | ${COVERAGE}% |" >> $GITHUB_STEP_SUMMARY
+        echo "| Threshold | ${THRESHOLD}% |" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+
+        if [[ "$THRESHOLD" -eq 0 ]]; then
+          echo "✅ Coverage threshold enforcement disabled" >> $GITHUB_STEP_SUMMARY
+          echo "Coverage threshold enforcement disabled (COVERAGE_THRESHOLD=0)"
+          exit 0
+        fi
+
+        if [[ -z "$COVERAGE" || "$COVERAGE" == "0" ]]; then
+          echo "⚠️ Coverage measurement failed - skipping threshold check" >> $GITHUB_STEP_SUMMARY
+          echo "Coverage measurement failed - skipping threshold check"
+          exit 0
+        fi
+
+        if [[ "$COVERAGE" -lt "$THRESHOLD" ]]; then
+          echo "❌ Coverage ${COVERAGE}% is below threshold ${THRESHOLD}%" >> $GITHUB_STEP_SUMMARY
+          echo "::error::Coverage ${COVERAGE}% is below threshold ${THRESHOLD}%"
+          exit 1
+        else
+          echo "✅ Coverage ${COVERAGE}% meets threshold ${THRESHOLD}%" >> $GITHUB_STEP_SUMMARY
+          echo "Coverage ${COVERAGE}% meets threshold ${THRESHOLD}%"
+        fi
+
+    - name: Upload coverage artifacts
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: coverage-report
+        path: coverage/
+        retention-days: 7
+
+    - name: Upload coverage to Codecov (optional)
+      uses: codecov/codecov-action@v4
+      if: always()
+      continue-on-error: true
+      with:
+        directory: coverage/all-unit
+        fail_ci_if_error: false
+        verbose: true
diff --git a/docs/code-review/2026-01-08-cli-parsing-tests-review.md b/docs/code-review/2026-01-08-cli-parsing-tests-review.md
@@ -0,0 +1,195 @@
+# Code Review Report: CLI Parsing Tests
+
+**Date:** 2026-01-08
+**Reviewer:** Code Review Agent
+**Component:** CLI Argument Parsing Unit Tests
+**Files Reviewed:** `tests/unit/test_cli_parsing.bats`
+**Ready for Production:** Yes
+
+## Executive Summary
+
+The CLI parsing test file is well-structured and provides comprehensive coverage of all 12 CLI flags in `ralph_loop.sh`. The tests follow BATS best practices with proper isolation, setup/teardown, and clear organization. One minor enhancement opportunity identified.
+
+**Critical Issues:** 0
+**Major Issues:** 0
+**Minor Issues:** 1
+**Positive Findings:** 6
+
+---
+
+## Review Context
+
+**Code Type:** Test Infrastructure (BATS unit tests)
+**Risk Level:** Low
+**Business Constraints:** Test reliability and maintainability
+
+### Review Focus Areas
+
+The review focused on the following areas based on context analysis:
+- ✅ Test Quality and Coverage - Primary concern for test code
+- ✅ Test Isolation and Cleanup - Prevent flaky tests
+- ✅ Resource Management - Temp directory handling
+- ✅ Code Maintainability - Long-term test maintenance
+- ❌ OWASP Web Security - Not applicable to test infrastructure
+- ❌ OWASP LLM/ML Security - Not applicable
+
+---
+
+## Priority 1 Issues - Critical
+
+**None identified.**
+
+---
+
+## Priority 2 Issues - Major
+
+**None identified.**
+
+---
+
+## Priority 3 Issues - Minor
+
+### Missing dedicated test for `--allowed-tools` validation
+
+**Location:** `tests/unit/test_cli_parsing.bats`
+**Severity:** Minor
+**Category:** Test Coverage
+
+**Problem:**
+The `--allowed-tools` flag is tested in the "All flags combined" test (line 276) but lacks a dedicated test for its validation behavior. The implementation in `ralph_loop.sh:976-981` calls `validate_allowed_tools()` which should be tested independently.
+
+**Recommendation:**
+Add a dedicated test for `--allowed-tools` validation to match the pattern used for other validated flags like `--timeout` and `--output-format`.
+
+**Suggested Approach:**
+```bash
+@test "--allowed-tools flag accepts valid tool list" {
+    run bash "$RALPH_SCRIPT" --allowed-tools "Write,Read,Bash" --help
+
+    assert_success
+    [[ "$output" == *"Usage:"* ]]
+}
+```
+
+**Note:** This is low priority since the flag is covered in combination tests and the validation function may have its own tests elsewhere.
+
+---
+
+## Positive Findings
+
+### Excellent Practices
+
+- **Comprehensive Flag Coverage:** All 12 CLI flags are tested including both long and short forms
+- **Boundary Testing:** The `--timeout` test validates edge cases (0, 1, 120, 121, -5, "abc")
+- **Clear Organization:** Well-structured sections with descriptive headers make tests easy to navigate
+- **Early Exit Pattern:** Clever use of `--help` as escape hatch to test flag parsing without triggering main loop
+
+### Good Architectural Decisions
+
+- **Test Isolation:** Each test creates its own temp directory with proper cleanup in teardown
+- **Minimal Stubs:** Only creates stub libraries actually needed by CLI parsing, not the entire system
+- **Git Initialization:** Proper setup of git repo required by some flags
+
+### Testing Wins
+
+- **Short Flag Equivalence:** Bonus tests verify `-c`, `-p`, `-s`, `-m`, `-v`, `-t` work identically to long forms
+- **Multiple Flag Combinations:** Tests verify flags work together and are order-independent
+- **Error Message Validation:** Tests check for specific error messages, not just failure status
+
+---
+
+## Team Collaboration Needed
+
+### Handoffs to Other Agents
+
+**Architecture Agent:**
+- No issues identified
+
+**UX Designer Agent:**
+- Not applicable for CLI tests
+
+**DevOps Agent:**
+- Tests integrate well with existing CI/CD via `bats tests/unit/`
+
+---
+
+## Testing Recommendations
+
+### Unit Tests Needed
+- [x] Help flag tests (2) - Implemented
+- [x] Flag value setting tests (6) - Implemented
+- [x] Status flag tests (2) - Implemented
+- [x] Circuit breaker tests (2) - Implemented
+- [x] Invalid input tests (3) - Implemented
+- [x] Multiple flags tests (3) - Implemented
+- [x] Flag order tests (2) - Implemented
+- [x] Short flag equivalence tests (6) - Implemented (bonus)
+- [ ] Dedicated `--allowed-tools` validation test - Optional enhancement
+
+### Integration Tests
+- Existing integration tests in `tests/integration/` cover full loop execution
+
+---
+
+## Future Considerations
+
+### Patterns for Project Evolution
+- If new CLI flags are added, this test file provides a clear template
+- Consider extracting flag validation functions for easier unit testing
+
+### Technical Debt Items
+- Minor: Could add `--allowed-tools` dedicated test (non-blocking)
+
+---
+
+## Compliance & Best Practices
+
+### Testing Standards Met
+- ✅ BATS framework used consistently
+- ✅ Setup/teardown isolation pattern
+- ✅ Clear test naming conventions
+- ✅ Both positive and negative test cases
+- ✅ Boundary value testing
+
+### Enterprise Best Practices
+- Test file follows project conventions from `test_helper.bash`
+- Uses fixtures helper for consistency
+- Proper temp directory cleanup prevents resource leaks
+
+---
+
+## Action Items Summary
+
+### Immediate (Before Production)
+None - code is ready for merge
+
+### Short-term (Next Sprint)
+1. Consider adding dedicated `--allowed-tools` validation test (optional)
+
+### Long-term (Backlog)
+None identified
+
+---
+
+## Conclusion
+
+The CLI parsing test file is production-ready with excellent coverage of all CLI flags. The test design is sound, using the `--help` escape hatch pattern to validate argument parsing without triggering the main execution loop. Tests are well-isolated with proper resource cleanup.
+
+**Recommendation:** Approve for merge. The one minor issue (missing dedicated `--allowed-tools` test) is non-blocking since the flag is tested in combination with other flags.
+
+---
+
+## Appendix
+
+### Tools Used for Review
+- Manual code review
+- BATS test execution
+
+### References
+- BATS documentation
+- Project CLAUDE.md testing standards
+
+### Metrics
+- **Lines of Code Reviewed:** 354
+- **Test Cases Reviewed:** 26
+- **CLI Flags Covered:** 12/12 (100%)
diff --git a/tests/unit/test_cli_parsing.bats b/tests/unit/test_cli_parsing.bats