Skip to content

Add content related to summary index #226

Add content related to summary index

Add content related to summary index #226

name: Analyze Documentation Changes
on:
pull_request:
branches: [main, revamp]
types: [opened, synchronize, reopened]
paths:
# IMPORTANT: These paths should match the language directories defined in tools/translate/config.json
# Currently configured for: en (source), cn, jp (targets)
# If you add/remove languages in config.json, update these paths accordingly
- 'docs.json'
- 'en/**/*.md'
- 'en/**/*.mdx'
- 'en/**/openapi*.json'
- 'zh/**/*.md'
- 'zh/**/*.mdx'
- 'zh/**/openapi*.json'
- 'ja/**/*.md'
- 'ja/**/*.mdx'
- 'ja/**/openapi*.json'
- 'versions/**/*.md'
- 'versions/**/*.mdx'
permissions:
contents: read
pull-requests: read
jobs:
analyze:
runs-on: ubuntu-latest
steps:
- name: Checkout PR
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Determine comparison range
id: determine-range
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "Determining comparison range..."
PR_NUMBER="${{ github.event.pull_request.number }}"
EVENT_ACTION="${{ github.event.action }}"
PR_BASE="${{ github.event.pull_request.base.sha }}"
PR_HEAD="${{ github.event.pull_request.head.sha }}"
if [ "$EVENT_ACTION" = "synchronize" ]; then
echo "🔄 Synchronize event - detecting incremental changes"
# Try to get last processed commit from translation PR
TRANSLATION_PR=$(gh pr list \
--search "head:docs-sync-pr-${PR_NUMBER} state:open" \
--json number \
--jq '.[0].number // empty' 2>/dev/null || echo "")
LAST_PROCESSED=""
if [ -n "$TRANSLATION_PR" ]; then
echo "Found translation PR #${TRANSLATION_PR}"
# Extract last processed commit from comments (reverse order to get latest)
LAST_PROCESSED=$(gh pr view "$TRANSLATION_PR" \
--json comments \
--jq '.comments | reverse | .[] | .body' 2>/dev/null \
| grep -oP 'Last-Processed-Commit: \K[a-f0-9]+' \
| head -1 || echo "")
if [ -n "$LAST_PROCESSED" ]; then
echo "✅ Found tracked commit in translation PR: $LAST_PROCESSED"
fi
fi
# Use tracked commit if available, otherwise fall back to github.event.before
if [ -n "$LAST_PROCESSED" ]; then
COMPARE_BASE="$LAST_PROCESSED"
echo "Using last processed commit: $COMPARE_BASE"
elif [ -n "${{ github.event.before }}" ] && [ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]; then
COMPARE_BASE="${{ github.event.before }}"
echo "Using github.event.before: $COMPARE_BASE"
else
# Fallback to PR base (first push after PR creation)
COMPARE_BASE="$PR_BASE"
echo "⚠️ No previous commit found, using PR base: $COMPARE_BASE"
fi
COMPARE_HEAD="$PR_HEAD"
IS_INCREMENTAL="true"
else
echo "🆕 New PR event - analyzing full changes"
# Use merge-base to find where branch diverged from main
# This allows stale branches to trigger automation without false "mixed content" errors
MERGE_BASE=$(git merge-base "$PR_BASE" "$PR_HEAD")
echo "Branch diverged from main at: $MERGE_BASE"
COMPARE_BASE="$MERGE_BASE"
COMPARE_HEAD="$PR_HEAD"
IS_INCREMENTAL="false"
fi
echo "compare_base=$COMPARE_BASE" >> $GITHUB_OUTPUT
echo "compare_head=$COMPARE_HEAD" >> $GITHUB_OUTPUT
echo "is_incremental=$IS_INCREMENTAL" >> $GITHUB_OUTPUT
echo "📊 Comparison range: $COMPARE_BASE...$COMPARE_HEAD"
- name: Categorize and validate PR changes
id: categorize
run: |
echo "Categorizing PR changes..."
# Get comparison range from previous step
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
echo "Base SHA: $BASE_SHA"
echo "Head SHA: $HEAD_SHA"
# Run PR analyzer
cd tools/translate
python pr_analyzer.py "$BASE_SHA" "$HEAD_SHA" > /tmp/pr_analysis_output.txt 2>&1
# Parse analyzer output
if [ $? -eq 0 ]; then
# Successful analysis
source /tmp/pr_analysis_output.txt
echo "PR categorization successful"
echo "PR Type: $pr_type"
echo "Should Skip: $should_skip"
# Set GitHub outputs
echo "pr_type=$pr_type" >> $GITHUB_OUTPUT
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
if [ "$should_skip" = "true" ]; then
if [ "$pr_type" = "translation" ]; then
echo "✅ Translation-only PR detected. Skipping automation (direct review process)."
elif [ "$pr_type" = "none" ]; then
echo "✅ No relevant documentation changes detected. Skipping workflow."
fi
exit 0
fi
else
# Analysis failed - likely mixed PR
echo "PR categorization failed - likely mixed content PR"
ERROR_MESSAGE=$(cat /tmp/pr_analysis_output.txt | grep "error_message=" | cut -d'=' -f2- || echo "Mixed content PR detected")
echo "error=mixed_pr" >> $GITHUB_OUTPUT
echo "error_message<<EOF" >> $GITHUB_OUTPUT
echo "$ERROR_MESSAGE" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
exit 1
fi
- name: Analyze source language changes for translation
if: steps.categorize.outputs.pr_type == 'source'
id: analyze
run: |
echo "Analyzing source language changes for automatic translation..."
# Use comparison range from determine-range step
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
IS_INCREMENTAL="${{ steps.determine-range.outputs.is_incremental }}"
echo "Comparison: $BASE_SHA...$HEAD_SHA"
echo "Incremental: $IS_INCREMENTAL"
# Get all changed files (not just English ones for file analysis)
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA)
# Count changes for security limits
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
echo "Changed files count: $FILE_COUNT"
# Security check: Limit number of files
MAX_FILES=50
if [ "$FILE_COUNT" -gt "$MAX_FILES" ]; then
echo "Error: Too many files changed ($FILE_COUNT > $MAX_FILES)"
echo "error=too_many_files" >> $GITHUB_OUTPUT
exit 1
fi
# Create analysis report
cat > /tmp/analysis.json <<EOF
{
"pr_number": ${{ github.event.pull_request.number }},
"pr_title": "${{ github.event.pull_request.title }}",
"pr_author": "${{ github.event.pull_request.user.login }}",
"base_sha": "$BASE_SHA",
"head_sha": "$HEAD_SHA",
"is_incremental": $IS_INCREMENTAL,
"event_action": "${{ github.event.action }}",
"file_count": $FILE_COUNT,
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"repository": "${{ github.repository }}",
"ref": "${{ github.ref }}",
"pr_type": "source"
}
EOF
# Save changed files list
echo "$CHANGED_FILES" > /tmp/changed_files.txt
# Analyze file types and sizes for source language files that need translation
> /tmp/file_analysis.txt
> /tmp/openapi_analysis.txt
while IFS= read -r file; do
if [[ "$file" =~ ^en/.*\.(md|mdx)$ ]] && [ -f "$file" ]; then
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
echo "$file|$SIZE|markdown" >> /tmp/file_analysis.txt
# Security check: File size limit (10MB)
MAX_SIZE=$((10 * 1024 * 1024))
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
echo "Error: File $file exceeds size limit ($SIZE > $MAX_SIZE)"
echo "error=file_too_large" >> $GITHUB_OUTPUT
exit 1
fi
elif [[ "$file" =~ ^en/.*/openapi.*\.json$ ]] && [ -f "$file" ]; then
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
echo "$file|$SIZE|openapi_json" >> /tmp/openapi_analysis.txt
# Security check: File size limit for OpenAPI JSON (10MB)
MAX_SIZE=$((10 * 1024 * 1024))
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
echo "Error: OpenAPI file $file exceeds size limit ($SIZE > $MAX_SIZE)"
echo "error=file_too_large" >> $GITHUB_OUTPUT
exit 1
fi
fi
done <<< "$CHANGED_FILES"
# Check for docs.json changes
if echo "$CHANGED_FILES" | grep -q '^docs\.json$'; then
echo "true" > /tmp/docs_json_changed.txt
# Use PR analyzer's docs.json analysis
cd tools/translate
python3 - <<EOF
import sys
sys.path.append('.')
from pr_analyzer import PRAnalyzer
analyzer = PRAnalyzer("$BASE_SHA", "$HEAD_SHA")
docs_changes = analyzer.analyze_docs_json_changes()
structure_changes = {
"structure_changed": docs_changes["any_docs_json_changes"],
"navigation_modified": docs_changes["source_section"],
"languages_affected": analyzer.config["target_languages"] if docs_changes["source_section"] else []
}
import json
with open("/tmp/structure_changes.json", "w") as f:
json.dump(structure_changes, f, indent=2)
EOF
else
echo "false" > /tmp/docs_json_changed.txt
echo '{"structure_changed": false, "navigation_modified": false, "languages_affected": []}' > /tmp/structure_changes.json
fi
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Analysis complete"
- name: Validate file paths
if: steps.analyze.outputs.has_changes == 'true'
run: |
echo "Validating source language file paths for translation..."
# Security: Validate source language files that will be translated
while IFS='|' read -r file size; do
if [ -n "$file" ]; then
# Check for directory traversal attempts
if echo "$file" | grep -q '\.\./'; then
echo "Error: Invalid file path detected: $file"
exit 1
fi
# Check file extension for source language files
if ! echo "$file" | grep -qE '\.(md|mdx)$'; then
echo "Error: Invalid file type for translation: $file"
exit 1
fi
# Check path starts with en/ (only source language files need translation)
if ! echo "$file" | grep -qE '^en/'; then
echo "Error: Non-source-language file in translation list: $file"
exit 1
fi
fi
done < /tmp/file_analysis.txt
# Validate OpenAPI JSON files
if [ -f "/tmp/openapi_analysis.txt" ] && [ -s "/tmp/openapi_analysis.txt" ]; then
while IFS='|' read -r file size file_type; do
if [ -n "$file" ]; then
# Check for directory traversal
if echo "$file" | grep -q '\.\./'; then
echo "Error: Invalid file path: $file"
exit 1
fi
# Check file extension
if ! echo "$file" | grep -qE '\.json$'; then
echo "Error: Invalid OpenAPI file type: $file"
exit 1
fi
# Check path starts with en/
if ! echo "$file" | grep -qE '^en/'; then
echo "Error: Non-source-language OpenAPI file in translation list: $file"
exit 1
fi
# Check pattern match (configurable via openapi*.json)
if ! echo "$file" | grep -qE 'openapi.*\.json$'; then
echo "Error: File doesn't match OpenAPI pattern: $file"
exit 1
fi
fi
done < /tmp/openapi_analysis.txt
fi
echo "All source language file paths validated for translation"
- name: Create analysis summary
if: steps.analyze.outputs.has_changes == 'true'
run: |
echo "Creating analysis summary for source language changes..."
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
PR_NUMBER=${{ github.event.pull_request.number }}
IS_INCREMENTAL="${{ steps.determine-range.outputs.is_incremental }}"
# Use SyncPlanGenerator for consistent logic across workflows
cd tools/translate
python3 - <<EOF
import json
import sys
sys.path.append('.')
from pr_analyzer import SyncPlanGenerator
# Generate sync plan using centralized logic
generator = SyncPlanGenerator("$BASE_SHA", "$HEAD_SHA")
sync_plan = generator.generate_sync_plan()
# Add PR metadata to sync plan
sync_plan["metadata"].update({
"pr_number": $PR_NUMBER,
"pr_title": "${{ github.event.pull_request.title }}",
"pr_author": "${{ github.event.pull_request.user.login }}",
"event_action": "${{ github.event.action }}",
"is_incremental": "$IS_INCREMENTAL" == "true",
"file_count": len(sync_plan["files_to_sync"]) + len(sync_plan["openapi_files_to_sync"]),
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"repository": "${{ github.repository }}",
"ref": "${{ github.ref }}",
"pr_type": "source"
})
# Save analysis.json (for backward compatibility with execute workflow)
with open("/tmp/analysis.json", "w") as f:
json.dump(sync_plan["metadata"], f, indent=2)
# Save sync plan
with open("/tmp/sync_plan.json", "w") as f:
json.dump(sync_plan, f, indent=2)
print(f"Source language sync plan created:")
print(f" - {len(sync_plan['files_to_sync'])} markdown files to translate")
print(f" - {len(sync_plan['openapi_files_to_sync'])} OpenAPI JSON files to translate")
if sync_plan['structure_changes'].get('structure_changed'):
print(" - Documentation structure changes detected")
EOF
- name: Upload analysis artifacts
if: steps.analyze.outputs.has_changes == 'true'
uses: actions/upload-artifact@v4
with:
name: docs-sync-analysis-${{ github.run_id }}
path: |
/tmp/analysis.json
/tmp/changed_files.txt
/tmp/file_analysis.txt
/tmp/openapi_analysis.txt
/tmp/sync_plan.json
/tmp/docs_json_changed.txt
/tmp/structure_changes.json
retention-days: 1