Deploy MCP Production #3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Deploy MCP Production | |
| # Manual promotion of the MCP Cloudflare Worker from staging | |
| # (mcp-staging.mcpjam.com) to production (mcp.mcpjam.com). Intentionally | |
| # workflow_dispatch-only — there's no auto-deploy-on-merge for prod, | |
| # matching release.yml's philosophy that production is a deliberate last | |
| # step, not a side effect of merging. | |
| # | |
| # Invocation paths: | |
| # 1. Soundcheck's "Deploy MCP production" tile (POST /api/mcp/dispatch). | |
| # 2. GitHub Actions UI → "Deploy MCP Production" → Run workflow. | |
| # | |
| # Reviewer gating lives on the `mcp-production` GitHub Environment in repo | |
| # Settings → Environments, not in this file. Keeping the gate in the | |
| # environment config means it applies uniformly to both invocation paths. | |
| on: | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| actions: read | |
| deployments: write | |
| # Never cancel an in-flight production deploy just because a second | |
| # dispatch arrived. Mid-deploy cancellation can leave the worker in a | |
| # partially-provisioned state (cert attached, bindings not). Queue instead. | |
| concurrency: | |
| group: mcp-production | |
| cancel-in-progress: false | |
| env: | |
| NODE_VERSION: "24.14.0" | |
| # Resolved once here so the environment URL, smoke test, and job | |
| # summary all point at the same target. Falls back to the known | |
| # production hostname when the override var is unset — hardcoded in | |
| # the fallback for the same reason deploy-mcp-staging.yml hardcodes | |
| # its URL: cloudflare/wrangler-action@v3 returns a malformed | |
| # `deployment-url` output for custom-domain deploys (wrangler-action#396). | |
| PRODUCTION_URL: ${{ vars.MCP_WORKER_PRODUCTION_URL || 'https://mcp.mcpjam.com' }} | |
| jobs: | |
| deploy: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| environment: | |
| name: mcp-production | |
| url: ${{ env.PRODUCTION_URL }} | |
| steps: | |
| - name: Enforce main branch | |
| run: | | |
| if [ "${GITHUB_REF_NAME}" != "main" ]; then | |
| echo "deploy-mcp-prod.yml must run from main" >&2 | |
| exit 1 | |
| fi | |
| - name: Require Cloudflare credentials | |
| env: | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| run: | | |
| if [ -z "$CLOUDFLARE_ACCOUNT_ID" ]; then | |
| echo "Missing GitHub Actions secret: CLOUDFLARE_ACCOUNT_ID" >&2 | |
| exit 1 | |
| fi | |
| if [ -z "$CLOUDFLARE_API_TOKEN" ]; then | |
| echo "Missing GitHub Actions secret: CLOUDFLARE_API_TOKEN" >&2 | |
| exit 1 | |
| fi | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| # Refuse to promote a SHA whose MCP build inputs haven't been | |
| # verified on staging. The check is looser than an exact-SHA match | |
| # because `deploy-mcp-staging.yml` only fires on mcp-relevant path | |
| # changes — so most main commits legitimately never trigger a | |
| # staging run for their SHA, and an exact-SHA gate would lock | |
| # promotion out whenever main moved for unrelated reasons. | |
| # | |
| # Logic: find the latest successful staging run on main. If its | |
| # SHA matches current main, done. Otherwise, compare the two SHAs | |
| # and verify no mcp-relevant files changed between them — meaning | |
| # what's live on staging is byte-identical (for the worker's | |
| # purposes) to what we'd ship. The mcp-relevant path list mirrors | |
| # deploy-mcp-staging.yml's `paths:` trigger filter, minus the | |
| # workflow file itself (which isn't a build input). | |
| - name: Require green staging for current MCP build inputs | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const workflowId = "deploy-mcp-staging.yml"; | |
| const { owner, repo } = context.repo; | |
| const headSha = context.sha; | |
| const runs = await github.paginate(github.rest.actions.listWorkflowRuns, { | |
| owner, | |
| repo, | |
| workflow_id: workflowId, | |
| branch: "main", | |
| status: "completed", | |
| per_page: 100, | |
| }); | |
| const latestSuccess = runs.find((run) => run.conclusion === "success"); | |
| if (!latestSuccess) { | |
| core.setFailed( | |
| `No successful ${workflowId} run on main. Cannot promote.` | |
| ); | |
| return; | |
| } | |
| if (latestSuccess.head_sha === headSha) { | |
| core.info( | |
| `Latest successful staging run ${latestSuccess.id} is at ${headSha} — safe to promote.` | |
| ); | |
| return; | |
| } | |
| // Different SHAs: check if the diff includes any MCP build input. | |
| // GitHub's compare response caps `files` at 300; for the volumes | |
| // we see between two main commits that's a non-issue, but if | |
| // something ever needs a 300+ file diff it'll error loudly here. | |
| const { data: diff } = await github.rest.repos.compareCommitsWithBasehead({ | |
| owner, | |
| repo, | |
| basehead: `${latestSuccess.head_sha}...${headSha}`, | |
| }); | |
| const isMcpRelevant = (filename) => | |
| filename.startsWith("mcp/") || | |
| filename === "package.json" || | |
| filename === "package-lock.json" || | |
| filename === ".changeset/config.json"; | |
| const touched = (diff.files ?? []) | |
| .map((f) => f.filename) | |
| .filter(isMcpRelevant); | |
| if (touched.length > 0) { | |
| core.setFailed( | |
| `Latest successful staging is at ${latestSuccess.head_sha.slice(0, 7)} ` + | |
| `but current main ${headSha.slice(0, 7)} has MCP-relevant changes not yet on ` + | |
| `staging: ${touched.join(", ")}. Wait for the next deploy-mcp-staging.yml run ` + | |
| `to land before promoting.` | |
| ); | |
| return; | |
| } | |
| core.info( | |
| `Latest successful staging run ${latestSuccess.id} is at ` + | |
| `${latestSuccess.head_sha.slice(0, 7)}; current main ${headSha.slice(0, 7)} ` + | |
| `differs only in non-MCP paths — safe to promote.` | |
| ); | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| cache: npm | |
| - name: Install workspace dependencies | |
| run: npm ci --legacy-peer-deps | |
| - name: Typecheck MCP worker | |
| run: npm run typecheck -w @mcpjam/mcp | |
| - name: Deploy production worker | |
| id: deploy | |
| uses: cloudflare/wrangler-action@v3 | |
| with: | |
| apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| gitHubToken: ${{ secrets.GITHUB_TOKEN }} | |
| workingDirectory: "mcp" | |
| command: deploy --env production | |
| - name: Smoke test landing page | |
| run: | | |
| # First deploy onto a new custom domain takes up to ~60s for | |
| # Cloudflare to provision the edge cert and propagate the | |
| # hostname. Each probe is bounded (--connect-timeout 3 | |
| # --max-time 6) so one hung TCP/TLS handshake can't blow past | |
| # the retry budget and hold the mcp-production concurrency | |
| # slot. Later deploys onto the existing hostname land | |
| # near-instant but the retry loop is cheap. | |
| for attempt in $(seq 1 20); do | |
| if curl --fail --silent --show-error --connect-timeout 3 --max-time 6 "$PRODUCTION_URL" | grep -q "MCPJam MCP"; then | |
| echo "Smoke OK on attempt $attempt" | |
| exit 0 | |
| fi | |
| echo "Attempt $attempt returned non-200 or mismatched content; retrying in 3s…" | |
| sleep 3 | |
| done | |
| echo "Production URL never resolved correctly." >&2 | |
| exit 1 | |
| - name: Summarize production deployment | |
| run: | | |
| echo "## ✅ MCP production deployed" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- Worker: \`mcpjam-mcp-production\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- URL: \`$PRODUCTION_URL\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- MCP endpoint: \`$PRODUCTION_URL/mcp\`" >> "$GITHUB_STEP_SUMMARY" |