Skip to content

Sync Upstream

Sync Upstream #70

name: Sync Upstream
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
downstream_branch:
description: Downstream branch to update (destination)
required: false
default: main
upstream_branch:
description: Upstream branch to sync from (source)
required: false
default: main
permissions:
contents: write
pull-requests: write
issues: write
security-events: write
jobs:
sync:
runs-on: ubuntu-latest
# Outputs are consumed by the snyk-scan job (separate runner)
outputs:
has_changes: ${{ steps.diffcheck.outputs.has_changes }}
head_branch: ${{ env.HEAD_BRANCH }}
env:
DOWNSTREAM_BRANCH: ${{ inputs.downstream_branch || 'main' }}
UPSTREAM_BRANCH: ${{ inputs.upstream_branch || 'main' }}
HEAD_BRANCH: sync/upstream-${{ inputs.upstream_branch || 'main' }}
LABEL: automated-rebase
steps:
- name: Checkout downstream repository
uses: actions/checkout@v4
with:
ref: ${{ env.DOWNSTREAM_BRANCH }}
fetch-depth: 0
persist-credentials: true
token: ${{ secrets.REPO_WORKFLOW_PAT }}
- name: Set up Git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
- name: Add upstream remote and fetch
run: |
git remote add upstream https://github.com/kubeflow/sdk || git remote set-url upstream https://github.com/kubeflow/sdk
git fetch upstream "${UPSTREAM_BRANCH}"
git fetch origin "${DOWNSTREAM_BRANCH}"
- name: Create sync branch from downstream
run: |
# Start from downstream - preserves midstream commits on top
git checkout -B "${HEAD_BRANCH}" "origin/${DOWNSTREAM_BRANCH}"
- name: Merge upstream and handle excluded paths
id: merge_step
run: |
set -euo pipefail
EXCLUDED_PATHS=(".github" "CHANGELOG" "OWNERS")
# Check if path is excluded
is_excluded() {
local file="$1"
for excluded in "${EXCLUDED_PATHS[@]}"; do
if [[ "$file" == "$excluded"* ]] || [[ "$file" == "$excluded" ]]; then
return 0
fi
done
return 1
}
# Try merge without committing
if git merge --no-commit --no-ff "upstream/${UPSTREAM_BRANCH}"; then
echo "Clean merge - restoring excluded paths"
# Clean merge - restore excluded paths from downstream
for path in "${EXCLUDED_PATHS[@]}"; do
if git cat-file -e "origin/${DOWNSTREAM_BRANCH}:${path}" 2>/dev/null; then
git checkout "origin/${DOWNSTREAM_BRANCH}" -- "${path}" || true
elif [ -e "${path}" ]; then
# Path exists in merge but not in downstream - remove it
git rm -rf "${path}" || true
fi
done
# Only commit if there are changes
if git diff --cached --quiet && git diff --quiet; then
echo "No changes to commit (already up to date)"
git merge --abort || true
else
git commit -m "Merge upstream/${UPSTREAM_BRANCH} (excluded paths preserved from midstream)"
fi
echo "has_conflict=false" >> "$GITHUB_OUTPUT"
else
# Conflicts detected
mapfile -t CONFLICTS < <(git diff --name-only --diff-filter=U || true)
if [ ${#CONFLICTS[@]} -eq 0 ]; then
echo "ERROR: Merge failed but no conflicts listed"
git merge --abort
exit 1
fi
# Check if all conflicts are in excluded paths
ONLY_EXCLUDED=true
for file in "${CONFLICTS[@]}"; do
if ! is_excluded "$file"; then
ONLY_EXCLUDED=false
echo "Real conflict in: $file"
break
fi
done
if [ "$ONLY_EXCLUDED" = true ]; then
echo "Conflicts only in excluded paths - keeping midstream version"
# Resolve all conflicts to keep midstream version
for file in "${CONFLICTS[@]}"; do
git checkout --ours "$file" || true
git add "$file" || true
done
git commit -m "Merge upstream/${UPSTREAM_BRANCH} (excluded path conflicts kept from midstream)"
echo "has_conflict=false" >> "$GITHUB_OUTPUT"
else
echo "Real conflicts detected in non-excluded files"
echo "Conflicts:"
for file in "${CONFLICTS[@]}"; do
echo " - $file"
done
echo ""
echo "Creating conflict PR for manual resolution..."
# Resolve excluded path conflicts automatically (keep ours)
for file in "${CONFLICTS[@]}"; do
if is_excluded "$file"; then
echo "Auto-resolving excluded file: $file (keeping midstream)"
git checkout --ours "$file" || true
git add "$file" || true
fi
done
# Check if there are still unresolved conflicts
UNRESOLVED=$(git diff --name-only --diff-filter=U || echo "")
if [ -n "$UNRESOLVED" ]; then
echo "Unresolved conflicts remain in:"
echo "$UNRESOLVED" | while read -r file; do
echo " - $file"
# Add files with conflict markers
git add "$file" || true
done
# Commit with conflict markers in place
git commit -m "Merge upstream/${UPSTREAM_BRANCH} (HAS CONFLICTS - manual resolution needed)"
echo "has_conflict=true" >> "$GITHUB_OUTPUT"
else
# All conflicts were in excluded paths, resolved
git commit -m "Merge upstream/${UPSTREAM_BRANCH} (excluded path conflicts auto-resolved)"
echo "has_conflict=false" >> "$GITHUB_OUTPUT"
fi
fi
fi
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
# Keep requirements.txt in sync after the upstream merge.
# Committed here so the change is included in the sync PR.
- name: Update requirements.txt
if: steps.merge_step.outputs.has_conflict != 'true'
run: |
uv export --format requirements-txt > requirements.txt
if ! git diff --quiet requirements.txt; then
git add requirements.txt
git commit -m "chore(deps): Update requirements.txt from uv.lock"
fi
- name: Check for real changes (excluding owned paths)
id: diffcheck
run: |
# If there's a conflict, always create PR
if [ "${{ steps.merge_step.outputs.has_conflict }}" = "true" ]; then
echo "has_changes=true" >> "$GITHUB_OUTPUT"
echo "PR needed for conflict resolution"
exit 0
fi
# Check if there are changes outside excluded paths
if git diff --quiet "origin/${DOWNSTREAM_BRANCH}...${HEAD_BRANCH}" -- \
':!.github' ':!CHANGELOG' ':!OWNERS'; then
echo "has_changes=false" >> "$GITHUB_OUTPUT"
echo "No changes outside excluded paths"
else
echo "has_changes=true" >> "$GITHUB_OUTPUT"
echo "Changes detected"
fi
- name: Push sync branch
if: steps.diffcheck.outputs.has_changes == 'true'
run: |
git push origin "${HEAD_BRANCH}" --force-with-lease
- name: Create or update PR
if: steps.diffcheck.outputs.has_changes == 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
if [ "${{ steps.merge_step.outputs.has_conflict }}" = "true" ]; then
TITLE="⚠️ chore: Merge upstream/${UPSTREAM_BRANCH} (HAS CONFLICTS)"
BODY="Upstream merge has conflicts that need manual resolution. Excluded path conflicts (.github, CHANGELOG, OWNERS) have been auto-resolved to midstream version. Please resolve remaining conflicts in the Files Changed tab or locally."
else
TITLE="chore: Merge upstream/${UPSTREAM_BRANCH} into ${DOWNSTREAM_BRANCH}"
BODY="Successfully merged upstream changes. Excluded paths (.github, CHANGELOG, OWNERS) preserved from midstream."
fi
PR_NUMBER=$(gh pr list \
--repo "${GITHUB_REPOSITORY}" \
--head "${HEAD_BRANCH}" \
--base "${DOWNSTREAM_BRANCH}" \
--state open \
--label "${LABEL}" \
--json number \
--jq '.[0].number // empty')
if [ -n "$PR_NUMBER" ]; then
echo "Updating PR #${PR_NUMBER}"
gh pr edit "$PR_NUMBER" \
--repo "${GITHUB_REPOSITORY}" \
--title "$TITLE" \
--body "$BODY"
else
echo "Creating PR"
gh pr create \
--repo "${GITHUB_REPOSITORY}" \
--base "${DOWNSTREAM_BRANCH}" \
--head "${HEAD_BRANCH}" \
--title "$TITLE" \
--body "$BODY" \
--label "${LABEL}" || \
gh pr create \
--repo "${GITHUB_REPOSITORY}" \
--base "${DOWNSTREAM_BRANCH}" \
--head "${HEAD_BRANCH}" \
--title "$TITLE" \
--body "$BODY"
fi
# Informational Snyk scan on the sync branch -- results appear in the
# Security tab but do not block the PR (fail_on_cves: false).
snyk-scan:
needs: sync
if: needs.sync.outputs.has_changes == 'true'
uses: ./.github/workflows/snyk-security-scan.yaml
with:
ref: ${{ needs.sync.outputs.head_branch }}
fail_on_cves: false
sarif_category: midstream-snyk-audit
secrets:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}