Security Scan #3

Workflow file for this run

.github/workflows/security-scan.yml at a893bce

	name: Security Scan

	on:
	schedule:
	# Run every Monday at 9:00 AM UTC
	- cron: "43 1 * * 1"
	workflow_dispatch:
	inputs:
	days_back:
	description: "Number of days back to scan"
	required: false
	default: "7"
	type: string

	permissions:
	contents: read

	jobs:
	security-scan:
	name: Security Scan with cagent
	runs-on: ubuntu-latest
	env:
	HAS_APP_SECRETS: ${{ secrets.CAGENT_REVIEWER_APP_ID != '' }}
	permissions:
	contents: read
	issues: write
	steps:
	- name: Check out Git repository
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	fetch-depth: 0 # Need full history to get commits from past week

	# Generate GitHub App token so issues appear as the custom app (optional - falls back to github.token)
	- name: Get GitHub App token
	id: app-token
	if: env.HAS_APP_SECRETS == 'true'
	continue-on-error: true # Don't fail workflow if token generation fails
	uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a # v2
	with:
	app_id: ${{ secrets.CAGENT_REVIEWER_APP_ID }}
	private_key: ${{ secrets.CAGENT_REVIEWER_APP_PRIVATE_KEY }}

	- name: Get commits from past week
	id: commits
	env:
	DAYS_BACK: ${{ inputs.days_back \|\| '7' }}
	run: \|
	# Use input days_back or default to 7
	DAYS_BACK="$DAYS_BACK"
	echo "Scanning commits from the past $DAYS_BACK days..."

	# Get commits from the past N days
	SINCE_DATE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%d 2>/dev/null \|\| date -u -v-${DAYS_BACK}d +%Y-%m-%d)
	echo "Since: $SINCE_DATE"

	# Get list of commits (use %H%n to ensure each hash has a trailing newline for wc -l)
	git log --since="$SINCE_DATE" --pretty=format:"%H" > /tmp/commit_hashes.txt

	# Count commits correctly (wc -l counts newlines, so we use grep -c to count non-empty lines)
	COMMIT_COUNT=$(grep -c . /tmp/commit_hashes.txt 2>/dev/null \|\| echo "0")

	echo "Found $COMMIT_COUNT commits to scan"
	echo "commit_count=$COMMIT_COUNT" >> $GITHUB_OUTPUT

	if [ "$COMMIT_COUNT" -eq 0 ]; then
	echo "No commits found in the past $DAYS_BACK days"
	echo "has_commits=false" >> $GITHUB_OUTPUT
	exit 0
	fi

	echo "has_commits=true" >> $GITHUB_OUTPUT

	# Generate full diffs for all commits (excluding markdown files)
	# Markdown is excluded because:
	# 1. Docs don't need security scanning
	# 2. Security docs contain example attack patterns that trigger false positives
	echo "Generating commit diffs (excluding *.md files)..."
	> /tmp/all_diffs.txt

	while IFS= read -r commit_hash; do
	echo "----------------------------------------" >> /tmp/all_diffs.txt
	echo "Commit: $commit_hash" >> /tmp/all_diffs.txt
	echo "Subject: $(git log -1 --pretty=format:%s $commit_hash)" >> /tmp/all_diffs.txt
	echo "Author: $(git log -1 --pretty=format:%an $commit_hash)" >> /tmp/all_diffs.txt
	echo "Date: $(git log -1 --pretty=format:%ci $commit_hash)" >> /tmp/all_diffs.txt
	echo "" >> /tmp/all_diffs.txt
	# Exclude markdown files from diff (-- . ':!*.md' is git pathspec syntax)
	git show --pretty=format: --patch "$commit_hash" -- . ':!*.md' >> /tmp/all_diffs.txt
	echo "" >> /tmp/all_diffs.txt
	echo "" >> /tmp/all_diffs.txt
	done < /tmp/commit_hashes.txt

	# Show size of diff file
	DIFF_SIZE=$(wc -c < /tmp/all_diffs.txt \| tr -d ' ')
	echo "Total diff size: $DIFF_SIZE bytes"

	# If diff is too large (>100KB), warn but continue
	if [ "$DIFF_SIZE" -gt 100000 ]; then
	echo "⚠️ Warning: Diff is large ($DIFF_SIZE bytes). AI analysis may be truncated."
	fi

	# Create the full prompt for security scanning
	# Using a multiline output to avoid command substitution in YAML
	cat > /tmp/scan_prompt.txt <<EOF
	Analyze these commits from the past $DAYS_BACK days for security vulnerabilities.

	Total commits: $COMMIT_COUNT

	EOF

	# Append the diffs to the prompt
	cat /tmp/all_diffs.txt >> /tmp/scan_prompt.txt

	# Set prompt as multiline output
	{
	echo "prompt<<PROMPT_EOF"
	cat /tmp/scan_prompt.txt
	echo "PROMPT_EOF"
	} >> $GITHUB_OUTPUT

	- name: Run security scan
	id: scan
	if: steps.commits.outputs.has_commits == 'true'
	uses: ./
	with:
	agent: agentcatalog/github-action-security-scanner
	prompt: ${{ steps.commits.outputs.prompt }}
	anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
	timeout: 300 # 5 minutes

	- name: Validate reported file paths
	id: validate
	if: steps.commits.outputs.has_commits == 'true'
	env:
	OUTPUT_FILE: ${{ steps.scan.outputs.output-file }}
	run: \|
	if [ ! -f "$OUTPUT_FILE" ]; then
	echo "No output file found"
	echo "has_valid_report=false" >> $GITHUB_OUTPUT
	exit 0
	fi

	# Check if the report contains "No security issues detected"
	if grep -q "No security issues detected" "$OUTPUT_FILE"; then
	echo "✅ No security issues found"
	echo "has_valid_report=false" >> $GITHUB_OUTPUT
	echo "has_security_issues=false" >> $GITHUB_OUTPUT
	echo "has_hallucinations=false" >> $GITHUB_OUTPUT
	exit 0
	fi

	# If we get here, the report claims to have found issues
	echo "has_security_issues=true" >> $GITHUB_OUTPUT

	# Extract file paths from the report.
	# Expected format in the report: File: `path/to/file.ts:123`
	# Only lines matching this exact format will be extracted.
	echo "Extracting file paths from security report..."
	echo "Expected format: File: \`path/to/file.ts:123\`"

	# Use sed for portability (works on both GNU and BSD)
	sed -n 's/.\\File:\\* `$[^:`]$./\1/p' "$OUTPUT_FILE" > /tmp/reported_files.txt
	EXTRACTION_STATUS=$?

	if [ $EXTRACTION_STATUS -ne 0 ]; then
	echo "⚠️ Warning: sed command failed during file path extraction"
	fi

	REPORTED_COUNT=$(wc -l < /tmp/reported_files.txt \| tr -d ' ')
	echo "Found $REPORTED_COUNT file paths in report"

	if [ "$REPORTED_COUNT" -eq 0 ]; then
	echo "⚠️ Warning: Report claims issues but contains no file paths in expected format"
	echo "This could mean the AI agent output format changed, or issues lack specific file references"
	echo "has_valid_report=true" >> $GITHUB_OUTPUT
	echo "has_hallucinations=false" >> $GITHUB_OUTPUT
	exit 0
	fi

	# Validate each file path exists in the repository
	> /tmp/invalid_files.txt
	> /tmp/valid_files.txt

	while IFS= read -r filepath; do
	if [ -f "$filepath" ] \|\| git ls-files --error-unmatch "$filepath" >/dev/null 2>&1; then
	echo "$filepath" >> /tmp/valid_files.txt
	echo " ✓ Valid: $filepath"
	else
	echo "$filepath" >> /tmp/invalid_files.txt
	echo " ✗ INVALID (hallucinated): $filepath"
	fi
	done < /tmp/reported_files.txt

	INVALID_COUNT=$(wc -l < /tmp/invalid_files.txt \| tr -d ' ')
	VALID_COUNT=$(wc -l < /tmp/valid_files.txt \| tr -d ' ')

	echo ""
	echo "Validation results:"
	echo " - Valid file paths: $VALID_COUNT"
	echo " - Invalid file paths (hallucinations): $INVALID_COUNT"

	if [ "$INVALID_COUNT" -gt 0 ]; then
	echo "has_hallucinations=true" >> $GITHUB_OUTPUT
	echo "hallucination_count=$INVALID_COUNT" >> $GITHUB_OUTPUT

	# Create warning report
	{
	echo "⚠️ AI HALLUCINATION DETECTED"
	echo ""
	echo "The security scanner reported $INVALID_COUNT issue(s) in files that don't exist:"
	echo ""
	while IFS= read -r filepath; do
	echo "- \`$filepath\`"
	done < /tmp/invalid_files.txt
	echo ""
	echo "These are AI hallucinations and have been filtered out."
	echo ""
	} > /tmp/hallucination_warning.md
	else
	echo "has_hallucinations=false" >> $GITHUB_OUTPUT
	fi

	if [ "$VALID_COUNT" -gt 0 ]; then
	echo "✅ Found $VALID_COUNT valid security issues"
	echo "has_valid_report=true" >> $GITHUB_OUTPUT
	else
	echo "✅ All reported issues were hallucinations - no real issues found"
	echo "has_valid_report=false" >> $GITHUB_OUTPUT
	fi

	- name: Check if issues were found
	id: check-issues
	if: steps.commits.outputs.has_commits == 'true'
	run: \|
	# Only create issue if security issues were actually found
	if [ "${{ steps.validate.outputs.has_security_issues }}" = "true" ] && [ "${{ steps.validate.outputs.has_valid_report }}" = "true" ]; then
	if [ "${{ steps.validate.outputs.has_hallucinations }}" = "true" ]; then
	echo "⚠️ Valid security issues found, but some hallucinations were filtered"
	fi
	echo "has_issues=true" >> $GITHUB_OUTPUT
	else
	echo "✅ No security issues to report"
	echo "has_issues=false" >> $GITHUB_OUTPUT
	fi

	- name: Create security issue
	if: steps.check-issues.outputs.has_issues == 'true'
	env:
	GH_TOKEN: ${{ steps.app-token.outputs.token \|\| github.token }}
	OUTPUT_FILE: ${{ steps.scan.outputs.output-file }}
	COMMIT_COUNT: ${{ steps.commits.outputs.commit_count }}
	DAYS_BACK: ${{ inputs.days_back \|\| '7' }}
	GITHUB_REPO: ${{ github.repository }}
	GITHUB_SERVER_URL: ${{ github.server_url }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	run: \|
	SCAN_DATE=$(date -u +"%Y-%m-%d")

	# Create issue title
	TITLE="🚨 Security Scan Results - $SCAN_DATE"

	# Prepare issue body with header
	cat > /tmp/issue_body.md <<'HEADER'
	Automated security scan detected potential vulnerabilities in recent commits.

	> ⚠️ This is an automated scan. Please review each finding carefully as there may be false positives.

	---

	HEADER

	# Add hallucination warning if detected
	if [ "${{ steps.validate.outputs.has_hallucinations }}" = "true" ]; then
	cat /tmp/hallucination_warning.md >> /tmp/issue_body.md
	echo "---" >> /tmp/issue_body.md
	echo "" >> /tmp/issue_body.md
	fi

	# Append the scan results
	cat "$OUTPUT_FILE" >> /tmp/issue_body.md

	# Add footer
	cat >> /tmp/issue_body.md <<FOOTER

	---

	Scan Details:
	- Commits scanned: $COMMIT_COUNT
	- Time period: Past $DAYS_BACK days
	- Scan date: $SCAN_DATE
	- Workflow run: $GITHUB_SERVER_URL/$GITHUB_REPO/actions/runs/$GITHUB_RUN_ID

	Next Steps:
	1. Review each issue for validity (automated scans may have false positives)
	2. Create separate issues for confirmed vulnerabilities if needed
	3. Assign priority based on severity and exploitability
	4. Close this issue once all findings have been addressed or dismissed

	/label security
	FOOTER

	# Create the issue
	gh issue create \
	--repo "$GITHUB_REPO" \
	--title "$TITLE" \
	--body-file /tmp/issue_body.md \
	--label "security"

	echo "✅ Security issue created"

	- name: Post success message
	if: steps.check-issues.outputs.has_issues == 'false' && steps.commits.outputs.has_commits == 'true'
	env:
	COMMIT_COUNT: ${{ steps.commits.outputs.commit_count }}
	run: \|
	echo "✅ Security scan completed successfully"
	echo "✅ No security issues detected in $COMMIT_COUNT commits"
	echo ""
	echo "To view the full scan results, check the workflow output."

	- name: Post summary
	if: always() && steps.commits.outputs.has_commits == 'true'
	env:
	COMMIT_COUNT: ${{ steps.commits.outputs.commit_count }}
	DAYS_BACK: ${{ inputs.days_back \|\| '7' }}
	HAS_HALLUCINATIONS: ${{ steps.validate.outputs.has_hallucinations }}
	HALLUCINATION_COUNT: ${{ steps.validate.outputs.hallucination_count }}
	HAS_ISSUES: ${{ steps.check-issues.outputs.has_issues }}
	run: \|
	{
	echo "## Security Scan Summary"
	echo ""
	echo "Commits scanned: $COMMIT_COUNT"
	echo "Time period: Past $DAYS_BACK days"
	echo ""
	if [ "$HAS_HALLUCINATIONS" = "true" ]; then
	echo "⚠️ AI Hallucinations Detected: $HALLUCINATION_COUNT file(s)"
	echo ""
	echo "The AI agent reported security issues in files that don't exist."
	echo "These have been filtered out automatically."
	echo ""
	fi
	if [ "$HAS_ISSUES" = "true" ]; then
	echo "Result: 🚨 Security issues detected"
	echo ""
	echo "A GitHub issue has been created with detailed findings."
	else
	echo "Result: ✅ No security issues detected"
	fi
	} >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Security Scan #3

Workflow file

Security Scan #3

Uh oh!

Workflow file for this run