Skip to content

fix: Resolve Docker container version detection for consistent docume… #26

fix: Resolve Docker container version detection for consistent docume…

fix: Resolve Docker container version detection for consistent docume… #26

Workflow file for this run

name: 🚀 Build & Deploy Documentation
on:
push:
branches:
- main # Full build + deploy
paths:
- 'drafts/current/specifications/**'
- 'drafts/current/schema/**'
- '.github/workflows/**'
# Allow manual triggering
workflow_dispatch:
inputs:
deploy:
description: 'Deploy to GitHub Pages (main branch only)'
required: false
type: boolean
default: false
# Ensure only one build runs at a time per branch, cancel previous runs
concurrency:
group: build-deploy-${{ github.ref }}
cancel-in-progress: true
env:
BOOST_DOC_VERSION: ${{ github.ref_name }}
jobs:
extract-version:
name: 🏷️ Extract Version Info
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
version: ${{ steps.version-info.outputs.version }}
build-timestamp: ${{ steps.version-info.outputs.timestamp }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Need full git history for version extraction
- name: Extract version information
id: version-info
run: |
echo "🔍 Extracting version information (outside Docker container)..."
# Determine version based on trigger type and git history
if [ "${{ github.ref_type }}" = "tag" ]; then
# Tagged release - use exact tag
VERSION="${{ github.ref_name }}"
echo "📌 Using release tag: $VERSION"
elif [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "$BOOST_DOC_VERSION" ] && [ "$BOOST_DOC_VERSION" != "main" ]; then
# Manual dispatch with specific version
VERSION="$BOOST_DOC_VERSION"
echo "🎯 Using manual version: $VERSION"
else
# Development build - use git describe for detailed version
if git describe --tags >/dev/null 2>&1; then
VERSION=$(git describe --tags --always)
echo "🔧 Using development version: $VERSION"
else
# Fallback for repos without tags
SHORT_SHA=$(git rev-parse --short HEAD)
VERSION="v0.0.0-${SHORT_SHA}"
echo "⚠️ No tags found, using commit: $VERSION"
fi
fi
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Store outputs
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "timestamp=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "::notice title=Version Detected::$VERSION (detected outside Docker container)"
build-documentation:
name: 📚 Build Documentation
runs-on: ubuntu-latest
timeout-minutes: 15 # Prevent stuck builds - Docker should be ~2-3 min
needs: extract-version
container:
image: ghcr.io/carbondirect/boost/boost-builder:latest
options: --pull=missing
env:
RELEASE_VERSION: ${{ needs.extract-version.outputs.version }}
outputs:
version: ${{ needs.extract-version.outputs.version }}
build-timestamp: ${{ needs.extract-version.outputs.timestamp }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Still need for schema validation scripts
- name: Verify containerized environment
run: |
echo "🐳 Docker containerized build environment"
echo "Python version: $(python3 --version)"
echo "Bikeshed version: $(bikeshed --version)"
echo "TeXLive available: $(which pdflatex && echo 'Yes' || echo 'No')"
echo "Pandoc version: $(pandoc --version | head -1)"
- name: Confirm version information
id: confirm-version
run: |
# Use pre-extracted version from previous job
VERSION="${{ needs.extract-version.outputs.version }}"
TIMESTAMP="${{ needs.extract-version.outputs.timestamp }}"
echo "🏷️ Using pre-extracted version: $VERSION"
echo "⏰ Build timestamp: $TIMESTAMP"
echo "🐳 Container environment variable RELEASE_VERSION: $RELEASE_VERSION"
# Verify the environment variable is set correctly
if [ "$RELEASE_VERSION" != "$VERSION" ]; then
echo "⚠️ Warning: Environment variable mismatch"
echo " Pre-extracted: $VERSION"
echo " Container env: $RELEASE_VERSION"
else
echo "✅ Version successfully passed to Docker container"
fi
echo "::notice title=Build Version::Building BOOST Documentation $VERSION"
- name: Run comprehensive schema validation
working-directory: drafts/current/specifications
run: |
echo "🔍 Running comprehensive schema validation..."
python3 ../../../.github/scripts/validate-schemas.py
- name: Build HTML documentation
working-directory: drafts/current/specifications
run: |
echo "🏗️ Building HTML documentation..."
echo "📋 Branch: ${{ github.ref_name }}"
echo "🎯 Build type: ${{ github.ref == 'refs/heads/main' && 'Production (with deployment)' || 'Development (build only)' }}"
echo "🔧 Using version: $RELEASE_VERSION"
echo "📦 Version source: Pre-extracted outside Docker container"
chmod +x build-spec.sh
./build-spec.sh
- name: Validate build output
working-directory: drafts/current/specifications
run: |
# Comprehensive validation of build output
echo "🔍 Validating build output..."
# Check HTML generation
if [ ! -f "boost-spec.html" ]; then
echo "❌ boost-spec.html was not generated"
exit 1
fi
HTML_SIZE=$(wc -c < boost-spec.html)
echo "📊 Generated HTML size: $(echo $HTML_SIZE | numfmt --to=iec-i --suffix=B)"
if [ "$HTML_SIZE" -lt 200000 ]; then
echo "❌ Generated HTML file is too small ($HTML_SIZE bytes)"
exit 1
fi
# Validate HTML content
if ! grep -q "BOOST" boost-spec.html; then
echo "❌ Generated HTML missing BOOST content"
exit 1
fi
# Check for ReSpec styling
if ! grep -q "main-content-wrapper" boost-spec.html; then
echo "❌ ReSpec styling not applied correctly"
exit 1
fi
# Validate ERD Navigator
if [ ! -f "erd-navigator/index.html" ]; then
echo "❌ ERD Navigator missing"
exit 1
fi
if [ ! -f "erd-navigator/erd-config.json" ]; then
echo "❌ ERD Navigator configuration missing"
exit 1
fi
# Check schema count
SCHEMA_COUNT=$(find ../schema -name "validation_schema.json" | wc -l)
echo "📊 Total schemas: $SCHEMA_COUNT"
if [ "$SCHEMA_COUNT" -lt 30 ]; then
echo "⚠️ Schema count lower than expected ($SCHEMA_COUNT)"
fi
echo "✅ Build output validation passed"
- name: Build PDF documentation
if: always() # Generate PDF for all branches including main
working-directory: drafts/current/specifications
run: |
echo "📄 Building PDF documentation for ${{ github.ref_name }} branch..."
# Determine PDF filename based on branch
if [ "${{ github.ref_name }}" = "main" ]; then
PDF_FILENAME="boost-spec.pdf"
PDF_TITLE="BOOST Data Standard"
else
PDF_FILENAME="boost-spec-dev-${{ github.ref_name }}.pdf"
PDF_TITLE="BOOST Data Standard (Dev: ${{ github.ref_name }})"
fi
echo "📄 Target PDF: $PDF_FILENAME"
# Try LaTeX build first
if [ -f "boost-spec.tex" ]; then
echo "🔧 Building PDF from LaTeX..."
echo "📄 Running first LaTeX pass..."
pdflatex -shell-escape -interaction=nonstopmode boost-spec.tex || echo "LaTeX first pass failed"
echo "📄 Running second LaTeX pass (for TOC, LOF, LOT)..."
pdflatex -shell-escape -interaction=nonstopmode boost-spec.tex || echo "LaTeX second pass failed"
echo "📄 Running third LaTeX pass (for cross-references)..."
pdflatex -shell-escape -interaction=nonstopmode boost-spec.tex || echo "LaTeX third pass failed, trying Pandoc..."
# Rename if LaTeX generated boost-spec.pdf but we need a different name
if [ -f "boost-spec.pdf" ] && [ "$PDF_FILENAME" != "boost-spec.pdf" ]; then
mv boost-spec.pdf "$PDF_FILENAME"
fi
fi
# Fallback to Pandoc HTML->PDF conversion
if [ ! -f "$PDF_FILENAME" ] && [ -f "boost-spec.html" ]; then
echo "🔄 Converting HTML to PDF with Pandoc..."
# Create clean HTML for PDF conversion
python3 -c "
import re
with open('boost-spec.html', 'r') as f:
html = f.read()
# Remove interactive elements and styling that don't work in PDF
html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL)
html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL)
html = re.sub(r'onclick=\"[^\"]*\"', '', html)
html = re.sub(r'class=\"[^\"]*\"', '', html)
html = re.sub(r'id=\"toc\"', '', html) # Remove TOC for cleaner PDF
with open('boost-spec-clean.html', 'w') as f:
f.write(html)
"
# Generate PDF with Pandoc
pandoc boost-spec-clean.html \
-o "$PDF_FILENAME" \
--pdf-engine=xelatex \
--from=html \
--to=pdf \
--metadata title="$PDF_TITLE" \
--metadata author="BOOST Consortium" \
--metadata date="$(date +%Y-%m-%d)" \
--toc \
--toc-depth=3 \
--number-sections \
|| echo "⚠️ PDF generation failed"
rm -f boost-spec-clean.html
fi
# Check final result
if [ -f "$PDF_FILENAME" ]; then
PDF_SIZE=$(wc -c < "$PDF_FILENAME" | numfmt --to=iec-i --suffix=B)
echo "✅ PDF generated: $PDF_FILENAME ($PDF_SIZE)"
else
echo "❌ PDF generation failed for $PDF_FILENAME"
fi
- name: Run documentation consistency validation
if: always()
working-directory: drafts/current/specifications
run: |
echo "🔍 Running HTML/PDF consistency validation..."
if [ -f "scripts/validate-consistency.py" ]; then
python3 scripts/validate-consistency.py --strict || {
echo "⚠️ Consistency validation found issues"
echo "📊 Check build/consistency-report.json for details"
# Don't fail the build, just warn
exit 0
}
echo "✅ HTML and PDF documentation are consistent"
else
echo "⚠️ Consistency validation script not found"
fi
- name: Generate build report
working-directory: drafts/current/specifications
shell: bash
run: |
echo "📋 Generating build report..."
cat > build-report.md << EOF
# BOOST Documentation Build Report
**Version:** ${{ steps.extract-version.outputs.version }}
**Build Time:** ${{ steps.extract-version.outputs.timestamp }}
**Commit:** ${GITHUB_SHA::8}
**Branch:** ${{ github.ref_name }}
**Build Type:** ${{ github.ref == 'refs/heads/main' && 'Production (with deployment)' || 'Development (build only)' }}
## Build Statistics
- **HTML Size:** $(wc -c < boost-spec.html | numfmt --to=iec-i --suffix=B)
- **Schema Files:** $(find ../schema -name "validation_schema.json" | wc -l)
- **Dictionary Files:** $(find ../schema -name "*_dictionary.md" | wc -l)
- **Include Modules:** $(find includes -name "*.md" | wc -l)
## Generated Artifacts
- ✅ HTML Documentation (boost-spec.html)
- ✅ ERD Navigator (erd-navigator/)
- ✅ Schema Files (../schema/)
- ✅ ReSpec Styling Applied
## Validation Results
- ✅ Schema Validation Passed
- ✅ FK Integrity Validation Passed
- ✅ HTML Generation Successful
- ✅ Content Validation Passed
EOF
echo "📋 Build Report Generated"
cat build-report.md
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: boost-documentation-${{ needs.extract-version.outputs.version }}-${{ github.ref_name }}
path: |
drafts/current/specifications/boost-spec.html
drafts/current/specifications/boost-spec-dev-*.pdf
drafts/current/specifications/boost-spec.pdf
drafts/current/specifications/build-report.md
drafts/current/specifications/erd-navigator/
drafts/current/schema/
retention-days: ${{ github.ref == 'refs/heads/main' && 30 || 7 }}
deploy-github-pages:
name: 🌐 Deploy to GitHub Pages
runs-on: ubuntu-latest
needs: [extract-version, build-documentation]
if: github.ref == 'refs/heads/main'
# Grant write permissions for Pages deployment
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: boost-documentation-${{ needs.extract-version.outputs.version }}-${{ github.ref_name }}
path: ./build-output
- name: Prepare Pages deployment
run: |
mkdir -p ./pages-content
# List downloaded artifacts for debugging
echo "📋 Downloaded artifact structure:"
find build-output -type f -name "*.html" -o -name "*.pdf" -o -name "*.md" | head -10
# Copy main HTML documentation
if [ -f "build-output/specifications/boost-spec.html" ]; then
cp build-output/specifications/boost-spec.html ./pages-content/
echo "✅ Copied boost-spec.html"
else
echo "❌ boost-spec.html not found at build-output/specifications/boost-spec.html"
exit 1
fi
# Copy ERD navigator if it exists
if [ -d "build-output/specifications/erd-navigator" ]; then
cp -r build-output/specifications/erd-navigator ./pages-content/
echo "✅ Copied ERD navigator"
fi
# Copy build report if it exists
if [ -f "build-output/specifications/build-report.md" ]; then
cp build-output/specifications/build-report.md ./pages-content/
echo "✅ Copied build report"
fi
# Copy PDF if it exists
if [ -f "build-output/specifications/boost-spec.pdf" ]; then
cp build-output/specifications/boost-spec.pdf ./pages-content/
echo "✅ Copied PDF documentation"
fi
# Copy schema files (for direct access to individual schema files)
if [ -d "build-output/schema" ]; then
cp -r build-output/schema ./pages-content/
echo "✅ Copied schema directory (individual files accessible)"
else
echo "❌ Schema directory not found at build-output/schema"
# Debug: show what's actually in the build output
echo "📋 Available paths in build-output:"
find build-output -name "schema" -type d
exit 1
fi
# Create index.html landing page with download options (no auto-redirect)
cat > ./pages-content/index.html << EOF
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>BOOST Data Standard Documentation</title>
<link rel="canonical" href="./boost-spec.html">
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
max-width: 800px;
margin: 50px auto;
padding: 20px;
line-height: 1.6;
}
.hero-section {
text-align: center;
margin-bottom: 40px;
padding: 30px;
background: #f8f9fa;
border-radius: 10px;
}
.main-button {
display: inline-block;
margin: 15px;
padding: 15px 30px;
background: #28a745;
color: white;
text-decoration: none;
border-radius: 8px;
font-size: 18px;
font-weight: 600;
}
.main-button:hover { background: #218838; }
.download-links { margin: 30px 0; }
.download-links a {
display: inline-block;
margin: 8px 12px 8px 0;
padding: 10px 20px;
background: #0066cc;
color: white;
text-decoration: none;
border-radius: 5px;
}
.download-links a:hover { background: #0052a3; }
.description {
color: #666;
margin-bottom: 30px;
font-size: 16px;
}
</style>
</head>
<body>
<div class="hero-section">
<h1>🌱 BOOST Data Standard</h1>
<p class="description">
Biomass Origin and Ownership Supply-chain Tracking (BOOST) provides a comprehensive
data standard for tracking biomass materials through complex supply chains with
complete traceability and sustainability verification.
</p>
<a href="./boost-spec.html" class="main-button">📖 View Documentation Online</a>
</div>
<div class="download-links">
<h2>📥 Download & Resources</h2>
EOF
# Add PDF link if PDF exists
if [ -f "./pages-content/boost-spec.pdf" ]; then
cat >> ./pages-content/index.html << EOF
<a href="./boost-spec.pdf">📄 Download PDF</a>
EOF
fi
# Add other download links
cat >> ./pages-content/index.html << EOF
<a href="./erd-navigator/">🔍 ERD Navigator</a>
<a href="https://github.com/carbondirect/BOOST/tree/main/drafts/current/schema">📋 JSON Schemas</a>
</div>
<p><em>Choose how you'd like to access the BOOST Data Standard documentation above.</em></p>
</body>
</html>
EOF
# Add version info
echo "${{ needs.extract-version.outputs.version }}" > ./pages-content/VERSION
echo "${{ needs.extract-version.outputs.build-timestamp }}" > ./pages-content/BUILD_TIME
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Upload to GitHub Pages
uses: actions/upload-pages-artifact@v3
with:
path: ./pages-content
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
- name: Update deployment status
run: |
echo "🌐 Documentation deployed to: ${{ steps.deployment.outputs.page_url }}"
echo "📚 BOOST Specification: ${{ steps.deployment.outputs.page_url }}boost-spec.html"
echo "🔍 ERD Navigator: ${{ steps.deployment.outputs.page_url }}erd-navigator/"