FIX: Add fallback validation for CI environments without pdftotext #49
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 🚀 Build & Deploy Documentation | |
| on: | |
| push: | |
| branches: | |
| - main # Full build + deploy | |
| paths: | |
| - 'drafts/current/specifications/**' | |
| - 'drafts/current/schema/**' | |
| - '.github/workflows/**' | |
| # Allow manual triggering | |
| workflow_dispatch: | |
| inputs: | |
| deploy: | |
| description: 'Deploy to GitHub Pages (main branch only)' | |
| required: false | |
| type: boolean | |
| default: false | |
| # Ensure only one build runs at a time per branch, cancel previous runs | |
| concurrency: | |
| group: build-deploy-${{ github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| BOOST_DOC_VERSION: ${{ github.ref_name }} | |
| jobs: | |
| extract-version: | |
| name: 🏷️ Extract Version Info | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| outputs: | |
| version: ${{ steps.version-info.outputs.version }} | |
| build-timestamp: ${{ steps.version-info.outputs.timestamp }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Need full git history for version extraction | |
| - name: Extract version information | |
| id: version-info | |
| run: | | |
| echo "🔍 Extracting version information (outside Docker container)..." | |
| # Determine version based on trigger type and git history | |
| if [ "${{ github.ref_type }}" = "tag" ]; then | |
| # Tagged release - use exact tag | |
| VERSION="${{ github.ref_name }}" | |
| echo "📌 Using release tag: $VERSION" | |
| elif [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "$BOOST_DOC_VERSION" ] && [ "$BOOST_DOC_VERSION" != "main" ]; then | |
| # Manual dispatch with specific version | |
| VERSION="$BOOST_DOC_VERSION" | |
| echo "🎯 Using manual version: $VERSION" | |
| else | |
| # Development build - use git describe for detailed version | |
| if git describe --tags >/dev/null 2>&1; then | |
| VERSION=$(git describe --tags --always) | |
| echo "🔧 Using development version: $VERSION" | |
| else | |
| # Fallback for repos without tags | |
| SHORT_SHA=$(git rev-parse --short HEAD) | |
| VERSION="v0.0.0-${SHORT_SHA}" | |
| echo "⚠️ No tags found, using commit: $VERSION" | |
| fi | |
| fi | |
| TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
| # Store outputs | |
| echo "version=$VERSION" >> $GITHUB_OUTPUT | |
| echo "timestamp=$TIMESTAMP" >> $GITHUB_OUTPUT | |
| echo "::notice title=Version Detected::$VERSION (detected outside Docker container)" | |
| build-documentation: | |
| name: 📚 Build Documentation | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 # Prevent stuck builds - Docker should be ~2-3 min | |
| needs: extract-version | |
| container: | |
| image: ghcr.io/carbondirect/boost/boost-builder:latest | |
| options: --pull=missing | |
| env: | |
| RELEASE_VERSION: ${{ needs.extract-version.outputs.version }} | |
| outputs: | |
| version: ${{ needs.extract-version.outputs.version }} | |
| build-timestamp: ${{ needs.extract-version.outputs.timestamp }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Still need for schema validation scripts | |
| - name: Verify containerized environment | |
| run: | | |
| echo "🐳 Docker containerized build environment" | |
| echo "Python version: $(python3 --version)" | |
| echo "Bikeshed version: $(bikeshed --version)" | |
| echo "TeXLive available: $(which pdflatex && echo 'Yes' || echo 'No')" | |
| echo "Pandoc version: $(pandoc --version | head -1)" | |
| - name: Confirm version information | |
| id: confirm-version | |
| run: | | |
| # Use pre-extracted version from previous job | |
| VERSION="${{ needs.extract-version.outputs.version }}" | |
| TIMESTAMP="${{ needs.extract-version.outputs.timestamp }}" | |
| echo "🏷️ Using pre-extracted version: $VERSION" | |
| echo "⏰ Build timestamp: $TIMESTAMP" | |
| echo "🐳 Container environment variable RELEASE_VERSION: $RELEASE_VERSION" | |
| # Verify the environment variable is set correctly | |
| if [ "$RELEASE_VERSION" != "$VERSION" ]; then | |
| echo "⚠️ Warning: Environment variable mismatch" | |
| echo " Pre-extracted: $VERSION" | |
| echo " Container env: $RELEASE_VERSION" | |
| else | |
| echo "✅ Version successfully passed to Docker container" | |
| fi | |
| echo "::notice title=Build Version::Building BOOST Documentation $VERSION" | |
| - name: Run comprehensive schema validation | |
| working-directory: drafts/current/specifications | |
| run: | | |
| echo "🔍 Running comprehensive schema validation..." | |
| python3 ../../../.github/scripts/validate-schemas.py | |
| - name: Verify container dependencies | |
| run: | | |
| echo "🔍 Verifying container dependencies..." | |
| echo "Python packages:" | |
| pip3 list | grep -E "(pycairo|bikeshed|jsonschema|pydantic)" | |
| echo "System packages:" | |
| dpkg -l | grep -E "(pkg-config|libcairo2-dev|libgirepository)" | |
| echo "✅ Container dependencies verified" | |
| - name: Build documentation (HTML and PDF) | |
| working-directory: drafts/current/specifications | |
| shell: bash | |
| run: | | |
| echo "🏗️ Building documentation with consolidated build system..." | |
| echo "📋 Branch: ${{ github.ref_name }}" | |
| echo "🎯 Build type: ${{ github.ref == 'refs/heads/main' && 'Production (with deployment)' || 'Development (build only)' }}" | |
| echo "🔧 Using version: $RELEASE_VERSION" | |
| echo "📦 Version source: Pre-extracted outside Docker container" | |
| chmod +x build.sh | |
| ./build.sh | |
| - name: Validate build output | |
| working-directory: drafts/current/specifications | |
| run: | | |
| # Comprehensive validation of build output | |
| echo "🔍 Validating build output..." | |
| # Check HTML generation | |
| if [ ! -f "boost-spec.html" ]; then | |
| echo "❌ boost-spec.html was not generated" | |
| exit 1 | |
| fi | |
| HTML_SIZE=$(wc -c < boost-spec.html) | |
| echo "📊 Generated HTML size: $(echo $HTML_SIZE | numfmt --to=iec-i --suffix=B)" | |
| if [ "$HTML_SIZE" -lt 200000 ]; then | |
| echo "❌ Generated HTML file is too small ($HTML_SIZE bytes)" | |
| exit 1 | |
| fi | |
| # Validate HTML content | |
| if ! grep -q "BOOST" boost-spec.html; then | |
| echo "❌ Generated HTML missing BOOST content" | |
| exit 1 | |
| fi | |
| # Check for ReSpec-style layout structure (CRITICAL - DO NOT REMOVE) | |
| echo "🔍 Validating ReSpec-style layout system..." | |
| if ! grep -q "main-content-wrapper" boost-spec.html; then | |
| echo "❌ ReSpec-style layout structure not applied correctly" | |
| echo "❌ CRITICAL: main-content-wrapper div missing - layout system broken!" | |
| echo "ℹ️ See ARCHITECTURE.md for details on hybrid Bikeshed+ReSpec system" | |
| exit 1 | |
| fi | |
| WRAPPER_COUNT=$(grep -c "main-content-wrapper" boost-spec.html) | |
| echo "✅ Found $WRAPPER_COUNT main-content-wrapper references" | |
| # Validate critical CSS is included (embedded inline, not as external file) | |
| if ! grep -q "ReSpec-Inspired BOOST Documentation Styles" boost-spec.html; then | |
| echo "❌ CRITICAL: ReSpec-style CSS not embedded - styling broken!" | |
| echo "ℹ️ This CSS is essential for the ReSpec-style layout system" | |
| echo "🔍 Looking for embedded CSS content rather than external file reference" | |
| exit 1 | |
| fi | |
| echo "✅ ReSpec-style CSS properly embedded" | |
| # Check for sidebar TOC structure | |
| if ! grep -q "toc.*sidebar" boost-spec.html; then | |
| echo "⚠️ Warning: Sidebar TOC structure may be missing" | |
| fi | |
| echo "✅ ReSpec-style layout system validation passed" | |
| # Validate ERD Navigator | |
| if [ ! -f "erd-navigator/index.html" ]; then | |
| echo "❌ ERD Navigator missing" | |
| exit 1 | |
| fi | |
| if [ ! -f "erd-navigator/erd-config.json" ]; then | |
| echo "❌ ERD Navigator configuration missing" | |
| exit 1 | |
| fi | |
| # Check schema count | |
| SCHEMA_COUNT=$(find ../schema -name "validation_schema.json" | wc -l) | |
| echo "📊 Total schemas: $SCHEMA_COUNT" | |
| if [ "$SCHEMA_COUNT" -lt 30 ]; then | |
| echo "⚠️ Schema count lower than expected ($SCHEMA_COUNT)" | |
| fi | |
| echo "✅ Build output validation passed" | |
| - name: Validate PDF generation | |
| if: always() # Check PDF for all branches including main | |
| working-directory: drafts/current/specifications | |
| run: | | |
| echo "📄 Validating PDF generation for ${{ github.ref_name }} branch..." | |
| # Check if PDF was generated by consolidated build system | |
| if [ -f "build/boost-spec.pdf" ]; then | |
| PDF_SIZE=$(wc -c < "build/boost-spec.pdf" | numfmt --to=iec-i --suffix=B) | |
| PDF_PAGES=$(pdfinfo build/boost-spec.pdf 2>/dev/null | grep Pages | awk '{print $2}' || echo 'Unknown') | |
| echo "✅ PDF generated by consolidated build system:" | |
| echo " 📄 File: build/boost-spec.pdf" | |
| echo " 📊 Size: $PDF_SIZE" | |
| echo " 📚 Pages: $PDF_PAGES" | |
| # Copy to root for artifact collection | |
| cp build/boost-spec.pdf ./boost-spec.pdf | |
| else | |
| echo "❌ PDF generation failed - build/boost-spec.pdf not found" | |
| echo "🔍 Contents of build directory:" | |
| ls -la build/ || echo "build directory not found" | |
| fi | |
| - name: Run documentation consistency validation | |
| if: always() | |
| working-directory: drafts/current/specifications | |
| run: | | |
| echo "🔍 Running HTML/PDF consistency validation..." | |
| if [ -f "scripts/validate-consistency.py" ]; then | |
| python3 scripts/validate-consistency.py --strict || { | |
| echo "⚠️ Consistency validation found issues" | |
| echo "📊 Check build/consistency-report.json for details" | |
| # Don't fail the build, just warn | |
| exit 0 | |
| } | |
| echo "✅ HTML and PDF documentation are consistent" | |
| else | |
| echo "⚠️ Consistency validation script not found" | |
| fi | |
| # Validate generated LaTeX content | |
| if [ -d "tex/entities" ]; then | |
| ENTITY_COUNT=$(find tex/entities -name "*-table.tex" | wc -l) | |
| echo "📊 Generated entity tables: $ENTITY_COUNT" | |
| if [ "$ENTITY_COUNT" -lt 30 ]; then | |
| echo "⚠️ Entity table count lower than expected ($ENTITY_COUNT)" | |
| else | |
| echo "✅ Entity tables generated successfully" | |
| fi | |
| fi | |
| - name: Generate build report | |
| working-directory: drafts/current/specifications | |
| shell: bash | |
| run: | | |
| echo "📋 Generating build report..." | |
| cat > build-report.md << EOF | |
| # BOOST Documentation Build Report | |
| **Version:** ${{ steps.extract-version.outputs.version }} | |
| **Build Time:** ${{ steps.extract-version.outputs.timestamp }} | |
| **Commit:** ${GITHUB_SHA::8} | |
| **Branch:** ${{ github.ref_name }} | |
| **Build Type:** ${{ github.ref == 'refs/heads/main' && 'Production (with deployment)' || 'Development (build only)' }} | |
| ## Build Statistics | |
| - **HTML Size:** $(wc -c < boost-spec.html | numfmt --to=iec-i --suffix=B) | |
| - **Schema Files:** $(find ../schema -name "validation_schema.json" | wc -l) | |
| - **Dictionary Files:** $(find ../schema -name "*_dictionary.md" | wc -l) | |
| - **Include Modules:** $(find includes -name "*.md" | wc -l) | |
| ## Generated Artifacts | |
| - ✅ HTML Documentation (boost-spec.html) | |
| - ✅ ERD Navigator (erd-navigator/) | |
| - ✅ Schema Files (../schema/) | |
| - ✅ ReSpec Styling Applied | |
| ## Validation Results | |
| - ✅ Schema Validation Passed | |
| - ✅ FK Integrity Validation Passed | |
| - ✅ HTML Generation Successful | |
| - ✅ Content Validation Passed | |
| EOF | |
| echo "📋 Build Report Generated" | |
| cat build-report.md | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: boost-documentation-${{ needs.extract-version.outputs.version }}-${{ github.ref_name }} | |
| path: | | |
| drafts/current/specifications/boost-spec.html | |
| drafts/current/specifications/boost-spec-dev-*.pdf | |
| drafts/current/specifications/boost-spec.pdf | |
| drafts/current/specifications/build-report.md | |
| drafts/current/specifications/erd-navigator/ | |
| drafts/current/schema/ | |
| retention-days: ${{ github.ref == 'refs/heads/main' && 30 || 7 }} | |
| deploy-github-pages: | |
| name: 🌐 Deploy to GitHub Pages | |
| runs-on: ubuntu-latest | |
| needs: [extract-version, build-documentation] | |
| if: github.ref == 'refs/heads/main' | |
| # Grant write permissions for Pages deployment | |
| permissions: | |
| contents: read | |
| pages: write | |
| id-token: write | |
| environment: | |
| name: github-pages | |
| url: ${{ steps.deployment.outputs.page_url }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: boost-documentation-${{ needs.extract-version.outputs.version }}-${{ github.ref_name }} | |
| path: ./build-output | |
| - name: Prepare Pages deployment | |
| run: | | |
| mkdir -p ./pages-content | |
| # List downloaded artifacts for debugging | |
| echo "📋 Downloaded artifact structure:" | |
| find build-output -type f -name "*.html" -o -name "*.pdf" -o -name "*.md" | head -10 | |
| # Copy main HTML documentation | |
| if [ -f "build-output/specifications/boost-spec.html" ]; then | |
| cp build-output/specifications/boost-spec.html ./pages-content/ | |
| echo "✅ Copied boost-spec.html" | |
| else | |
| echo "❌ boost-spec.html not found at build-output/specifications/boost-spec.html" | |
| echo "📋 Available HTML files in build output:" | |
| find build-output -name "*.html" -type f | |
| exit 1 | |
| fi | |
| # Copy ERD navigator if it exists | |
| if [ -d "build-output/specifications/erd-navigator" ]; then | |
| cp -r build-output/specifications/erd-navigator ./pages-content/ | |
| echo "✅ Copied ERD navigator" | |
| fi | |
| # Copy build report if it exists | |
| if [ -f "build-output/specifications/build-report.md" ]; then | |
| cp build-output/specifications/build-report.md ./pages-content/ | |
| echo "✅ Copied build report" | |
| fi | |
| # Copy PDF if it exists | |
| if [ -f "build-output/specifications/boost-spec.pdf" ]; then | |
| cp build-output/specifications/boost-spec.pdf ./pages-content/ | |
| echo "✅ Copied PDF documentation" | |
| fi | |
| # Copy schema files (for direct access to individual schema files) | |
| if [ -d "build-output/schema" ]; then | |
| cp -r build-output/schema ./pages-content/ | |
| echo "✅ Copied schema directory (individual files accessible)" | |
| else | |
| echo "❌ Schema directory not found at build-output/schema" | |
| # Debug: show what's actually in the build output | |
| echo "📋 Available paths in build-output:" | |
| find build-output -name "schema" -type d | |
| exit 1 | |
| fi | |
| # Create index.html landing page with download options (no auto-redirect) | |
| cat > ./pages-content/index.html << EOF | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta charset="utf-8"> | |
| <title>BOOST Data Standard Documentation</title> | |
| <link rel="canonical" href="./boost-spec.html"> | |
| <style> | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; | |
| max-width: 800px; | |
| margin: 50px auto; | |
| padding: 20px; | |
| line-height: 1.6; | |
| } | |
| .hero-section { | |
| text-align: center; | |
| margin-bottom: 40px; | |
| padding: 30px; | |
| background: #f8f9fa; | |
| border-radius: 10px; | |
| } | |
| .main-button { | |
| display: inline-block; | |
| margin: 15px; | |
| padding: 15px 30px; | |
| background: #28a745; | |
| color: white; | |
| text-decoration: none; | |
| border-radius: 8px; | |
| font-size: 18px; | |
| font-weight: 600; | |
| } | |
| .main-button:hover { background: #218838; } | |
| .download-links { margin: 30px 0; } | |
| .download-links a { | |
| display: inline-block; | |
| margin: 8px 12px 8px 0; | |
| padding: 10px 20px; | |
| background: #0066cc; | |
| color: white; | |
| text-decoration: none; | |
| border-radius: 5px; | |
| } | |
| .download-links a:hover { background: #0052a3; } | |
| .description { | |
| color: #666; | |
| margin-bottom: 30px; | |
| font-size: 16px; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="hero-section"> | |
| <h1>🌱 BOOST Data Standard</h1> | |
| <p class="description"> | |
| Biomass Origin and Ownership Supply-chain Tracking (BOOST) provides a comprehensive | |
| data standard for tracking biomass materials through complex supply chains with | |
| complete traceability and sustainability verification. | |
| </p> | |
| <a href="./boost-spec.html" class="main-button">📖 View Documentation Online</a> | |
| </div> | |
| <div class="download-links"> | |
| <h2>📥 Download & Resources</h2> | |
| EOF | |
| # Add PDF link if PDF exists | |
| if [ -f "./pages-content/boost-spec.pdf" ]; then | |
| cat >> ./pages-content/index.html << EOF | |
| <a href="./boost-spec.pdf">📄 Download PDF</a> | |
| EOF | |
| fi | |
| # Add other download links | |
| cat >> ./pages-content/index.html << EOF | |
| <a href="./erd-navigator/">🔍 ERD Navigator</a> | |
| <a href="https://github.com/carbondirect/BOOST/tree/main/drafts/current/schema">📋 JSON Schemas</a> | |
| </div> | |
| <p><em>Choose how you'd like to access the BOOST Data Standard documentation above.</em></p> | |
| </body> | |
| </html> | |
| EOF | |
| # Add version info | |
| echo "${{ needs.extract-version.outputs.version }}" > ./pages-content/VERSION | |
| echo "${{ needs.extract-version.outputs.build-timestamp }}" > ./pages-content/BUILD_TIME | |
| - name: Setup Pages | |
| uses: actions/configure-pages@v4 | |
| - name: Upload to GitHub Pages | |
| uses: actions/upload-pages-artifact@v3 | |
| with: | |
| path: ./pages-content | |
| - name: Deploy to GitHub Pages | |
| id: deployment | |
| uses: actions/deploy-pages@v4 | |
| - name: Update deployment status | |
| run: | | |
| echo "🌐 Documentation deployed to: ${{ steps.deployment.outputs.page_url }}" | |
| echo "📚 BOOST Specification: ${{ steps.deployment.outputs.page_url }}boost-spec.html" | |
| echo "🔍 ERD Navigator: ${{ steps.deployment.outputs.page_url }}erd-navigator/" |