Skip to content

Deploy MCP Production #3

Deploy MCP Production

Deploy MCP Production #3

name: Deploy MCP Production
# Manual promotion of the MCP Cloudflare Worker from staging
# (mcp-staging.mcpjam.com) to production (mcp.mcpjam.com). Intentionally
# workflow_dispatch-only — there's no auto-deploy-on-merge for prod,
# matching release.yml's philosophy that production is a deliberate last
# step, not a side effect of merging.
#
# Invocation paths:
# 1. Soundcheck's "Deploy MCP production" tile (POST /api/mcp/dispatch).
# 2. GitHub Actions UI → "Deploy MCP Production" → Run workflow.
#
# Reviewer gating lives on the `mcp-production` GitHub Environment in repo
# Settings → Environments, not in this file. Keeping the gate in the
# environment config means it applies uniformly to both invocation paths.
on:
workflow_dispatch:
permissions:
contents: read
actions: read
deployments: write
# Never cancel an in-flight production deploy just because a second
# dispatch arrived. Mid-deploy cancellation can leave the worker in a
# partially-provisioned state (cert attached, bindings not). Queue instead.
concurrency:
group: mcp-production
cancel-in-progress: false
env:
NODE_VERSION: "24.14.0"
# Resolved once here so the environment URL, smoke test, and job
# summary all point at the same target. Falls back to the known
# production hostname when the override var is unset — hardcoded in
# the fallback for the same reason deploy-mcp-staging.yml hardcodes
# its URL: cloudflare/wrangler-action@v3 returns a malformed
# `deployment-url` output for custom-domain deploys (wrangler-action#396).
PRODUCTION_URL: ${{ vars.MCP_WORKER_PRODUCTION_URL || 'https://mcp.mcpjam.com' }}
jobs:
deploy:
runs-on: ubuntu-latest
timeout-minutes: 30
environment:
name: mcp-production
url: ${{ env.PRODUCTION_URL }}
steps:
- name: Enforce main branch
run: |
if [ "${GITHUB_REF_NAME}" != "main" ]; then
echo "deploy-mcp-prod.yml must run from main" >&2
exit 1
fi
- name: Require Cloudflare credentials
env:
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
run: |
if [ -z "$CLOUDFLARE_ACCOUNT_ID" ]; then
echo "Missing GitHub Actions secret: CLOUDFLARE_ACCOUNT_ID" >&2
exit 1
fi
if [ -z "$CLOUDFLARE_API_TOKEN" ]; then
echo "Missing GitHub Actions secret: CLOUDFLARE_API_TOKEN" >&2
exit 1
fi
- name: Checkout code
uses: actions/checkout@v4
# Refuse to promote a SHA whose MCP build inputs haven't been
# verified on staging. The check is looser than an exact-SHA match
# because `deploy-mcp-staging.yml` only fires on mcp-relevant path
# changes — so most main commits legitimately never trigger a
# staging run for their SHA, and an exact-SHA gate would lock
# promotion out whenever main moved for unrelated reasons.
#
# Logic: find the latest successful staging run on main. If its
# SHA matches current main, done. Otherwise, compare the two SHAs
# and verify no mcp-relevant files changed between them — meaning
# what's live on staging is byte-identical (for the worker's
# purposes) to what we'd ship. The mcp-relevant path list mirrors
# deploy-mcp-staging.yml's `paths:` trigger filter, minus the
# workflow file itself (which isn't a build input).
- name: Require green staging for current MCP build inputs
uses: actions/github-script@v7
with:
script: |
const workflowId = "deploy-mcp-staging.yml";
const { owner, repo } = context.repo;
const headSha = context.sha;
const runs = await github.paginate(github.rest.actions.listWorkflowRuns, {
owner,
repo,
workflow_id: workflowId,
branch: "main",
status: "completed",
per_page: 100,
});
const latestSuccess = runs.find((run) => run.conclusion === "success");
if (!latestSuccess) {
core.setFailed(
`No successful ${workflowId} run on main. Cannot promote.`
);
return;
}
if (latestSuccess.head_sha === headSha) {
core.info(
`Latest successful staging run ${latestSuccess.id} is at ${headSha} — safe to promote.`
);
return;
}
// Different SHAs: check if the diff includes any MCP build input.
// GitHub's compare response caps `files` at 300; for the volumes
// we see between two main commits that's a non-issue, but if
// something ever needs a 300+ file diff it'll error loudly here.
const { data: diff } = await github.rest.repos.compareCommitsWithBasehead({
owner,
repo,
basehead: `${latestSuccess.head_sha}...${headSha}`,
});
const isMcpRelevant = (filename) =>
filename.startsWith("mcp/") ||
filename === "package.json" ||
filename === "package-lock.json" ||
filename === ".changeset/config.json";
const touched = (diff.files ?? [])
.map((f) => f.filename)
.filter(isMcpRelevant);
if (touched.length > 0) {
core.setFailed(
`Latest successful staging is at ${latestSuccess.head_sha.slice(0, 7)} ` +
`but current main ${headSha.slice(0, 7)} has MCP-relevant changes not yet on ` +
`staging: ${touched.join(", ")}. Wait for the next deploy-mcp-staging.yml run ` +
`to land before promoting.`
);
return;
}
core.info(
`Latest successful staging run ${latestSuccess.id} is at ` +
`${latestSuccess.head_sha.slice(0, 7)}; current main ${headSha.slice(0, 7)} ` +
`differs only in non-MCP paths — safe to promote.`
);
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
cache: npm
- name: Install workspace dependencies
run: npm ci --legacy-peer-deps
- name: Typecheck MCP worker
run: npm run typecheck -w @mcpjam/mcp
- name: Deploy production worker
id: deploy
uses: cloudflare/wrangler-action@v3
with:
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
gitHubToken: ${{ secrets.GITHUB_TOKEN }}
workingDirectory: "mcp"
command: deploy --env production
- name: Smoke test landing page
run: |
# First deploy onto a new custom domain takes up to ~60s for
# Cloudflare to provision the edge cert and propagate the
# hostname. Each probe is bounded (--connect-timeout 3
# --max-time 6) so one hung TCP/TLS handshake can't blow past
# the retry budget and hold the mcp-production concurrency
# slot. Later deploys onto the existing hostname land
# near-instant but the retry loop is cheap.
for attempt in $(seq 1 20); do
if curl --fail --silent --show-error --connect-timeout 3 --max-time 6 "$PRODUCTION_URL" | grep -q "MCPJam MCP"; then
echo "Smoke OK on attempt $attempt"
exit 0
fi
echo "Attempt $attempt returned non-200 or mismatched content; retrying in 3s…"
sleep 3
done
echo "Production URL never resolved correctly." >&2
exit 1
- name: Summarize production deployment
run: |
echo "## ✅ MCP production deployed" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "- Worker: \`mcpjam-mcp-production\`" >> "$GITHUB_STEP_SUMMARY"
echo "- URL: \`$PRODUCTION_URL\`" >> "$GITHUB_STEP_SUMMARY"
echo "- MCP endpoint: \`$PRODUCTION_URL/mcp\`" >> "$GITHUB_STEP_SUMMARY"