Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 0 additions & 132 deletions .github/workflows/docker-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,138 +16,6 @@ permissions:
packages: write

jobs:
marin-cluster-images:
# Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays)
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0')
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: 'actions/checkout@v5'

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4

- name: Set tags
id: set-tags
run: |
echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT"
echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT"

- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v2
with:
token_format: access_token
credentials_json: ${{ secrets.GCP_ARTIFACT_REPOSITORY_SA_KEY }}
service_account: github-artifact-repository@hai-gcp-models.iam.gserviceaccount.com

- name: Login to Google Artifact Registry in europe-west4
uses: docker/login-action@v4
with:
registry: europe-west4-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Login to Google Artifact Registry in us-central1
uses: docker/login-action@v4
with:
registry: us-central1-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Login to Google Artifact Registry in us-central2
uses: docker/login-action@v4
with:
registry: us-central2-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Login to Google Artifact Registry in us-east1
uses: docker/login-action@v4
with:
registry: us-east1-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Login to Google Artifact Registry in us-east5
uses: docker/login-action@v4
with:
registry: us-east5-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Login to Google Artifact Registry in us-west4
uses: docker/login-action@v4
with:
registry: us-west4-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Build Marin cluster image
run: |
docker buildx build --file docker/marin/Dockerfile.cluster \
--provenance=false \
--load \
--tag marin_cluster:local \
.

- name: Verify Marin cluster image
# TODO (rav): should we just run tests inside the container?
run: |
echo "Verifying critical imports work correctly..."
docker run --rm marin_cluster:local python -c "
import sys
print(f'Python: {sys.version}')

import numpy
print(f'numpy: {numpy.__version__}')

import pandas
print(f'pandas: {pandas.__version__}')

import ray
print(f'ray: {ray.__version__}')

import torch
print(f'torch: {torch.__version__}')

print('All critical imports verified successfully!')
"

- name: Push Marin cluster image
run: |
REGISTRIES="europe-west4 us-central1 us-central2 us-east1 us-east5 us-west4"
TAGS="latest ${{ steps.set-tags.outputs.DATE_TAG }} ${{ steps.set-tags.outputs.HASH_TAG }}"

for registry in $REGISTRIES; do
for tag in $TAGS; do
docker tag marin_cluster:local ${registry}-docker.pkg.dev/hai-gcp-models/marin/marin_cluster:${tag}
docker push ${registry}-docker.pkg.dev/hai-gcp-models/marin/marin_cluster:${tag}
done
done

- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.7.20"
enable-cache: true

- name: Update Docker image tag in cluster config script
run: sed -i -E 's/LATEST = "[a-z0-9]+"/LATEST = "${{ steps.set-tags.outputs.HASH_TAG }}"/' lib/marin/src/marin/cluster/config.py

- name: Generate cluster configs
run: uv run scripts/ray/cluster.py update-configs

- name: Create pull request
uses: peter-evans/create-pull-request@v6
with:
commit-message: Update Docker image tag
title: Update Docker image tag
branch: actions/update-docker-image-tag
body: Auto-generated from GitHub Actions.

# Iris Images - Worker, controller, and task images pushed to GHCR
# Uses a single multi-stage Dockerfile (lib/iris/Dockerfile) with --target.
# Controller and worker share all layers up to Python deps; the worker adds
Expand Down
45 changes: 0 additions & 45 deletions .github/workflows/marin-cleanup-tpus.yaml

This file was deleted.

85 changes: 2 additions & 83 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: help clean check fix cluster_docker cluster_docker_build cluster_docker_push setup_pre_commit rust-dev rust-user rust-status rust-package
.PHONY: help clean check fix setup_pre_commit rust-dev rust-user rust-status rust-package
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Update Ray build docs when removing cluster_docker targets

Removing the cluster_docker*/cluster_tag Make targets breaks the documented operator flow immediately, because docs/dev-guide/rebuilding-cluster.md (lines 60-70) and infra/README.md (lines 224-247) still instruct users to run those targets and scripts/ray/cluster.py, which this commit also deletes. In practice this now yields make: *** No rule to make target ... and dead commands in runbooks, so the docs are no longer executable after this change; this also violates the root AGENTS.md rule to keep MkDocs docs in sync with code.

Useful? React with 👍 / 👎.

.DEFAULT: help


Expand Down Expand Up @@ -51,89 +51,8 @@ test:
export HF_HUB_TOKEN=$HF_TOKEN
RAY_ADDRESS= PYTHONPATH=tests:. pytest tests --durations=0 -n 4 --tb=no -v

# Define regions and tags for the Docker images
CLUSTER_REPOS = us-central2 us-central1 europe-west4 us-west4 us-east5 us-east1
TAG_DATE = $(shell date -u +"%Y%m%d")
TAG_VERSIONS = latest $(shell git rev-parse --short HEAD) $(TAG_DATE)

# If VLLM is defined, use different Dockerfile and image name
ifdef VLLM
DOCKERFILE = docker/marin/Dockerfile.vllm
DOCKER_IMAGE_NAME = marin_vllm
else
DOCKERFILE = docker/marin/Dockerfile.cluster
DOCKER_IMAGE_NAME = marin_cluster
endif

# Target to build the Docker image and tag it appropriately
cluster_docker_build:
@echo "Building Docker image using Dockerfile: $(DOCKERFILE)"
docker buildx build --platform linux/amd64 --output "type=docker,compression=zstd" -t '$(DOCKER_IMAGE_NAME):latest' -f $(DOCKERFILE) .
@echo "Tagging Docker image for each region and version..."
$(foreach region,$(CLUSTER_REPOS), \
$(foreach version,$(TAG_VERSIONS), \
docker tag '$(DOCKER_IMAGE_NAME):latest' '$(region)-docker.pkg.dev/hai-gcp-models/marin/$(DOCKER_IMAGE_NAME):$(version)';))
@echo "Docker image build and tagging complete, updating config.py with latest version..."

cluster_tag:
@if [ "$$(uname)" = "Darwin" ]; then \
sed -i '' -e "s/LATEST = \".*\"/LATEST = \"$(TAG_DATE)\"/" lib/marin/src/marin/cluster/config.py; \
else \
sed -i -e "s/LATEST = \".*\"/LATEST = \"$(TAG_DATE)\"/" lib/marin/src/marin/cluster/config.py; \
fi

# Target to push the tagged Docker images to their respective Artifact Registries
cluster_docker_push: cluster_tag
@echo "Authenticating and preparing repositories..."
$(foreach region,$(CLUSTER_REPOS), \
gcloud auth configure-docker $(region)-docker.pkg.dev;)
$(foreach region,$(CLUSTER_REPOS), \
gcloud artifacts repositories list --location=$(region) --filter 'name:marin' > /dev/null || \
gcloud artifacts repositories create --repository-format=docker --location=$(region) marin;)
@echo "Pushing Docker images for each region and version..."
$(foreach region,$(CLUSTER_REPOS), \
$(foreach version,$(TAG_VERSIONS), \
docker push '$(region)-docker.pkg.dev/hai-gcp-models/marin/$(DOCKER_IMAGE_NAME):$(version)';))

cluster_docker_ghcr_push: cluster_docker_build
@echo "Pushing Docker image to GitHub Container Registry..."
$(foreach version,$(TAG_VERSIONS), \
docker tag '$(DOCKER_IMAGE_NAME):latest' 'ghcr.io/stanford-crfm/marin/$(DOCKER_IMAGE_NAME):$(version)';)

$(foreach version,$(TAG_VERSIONS), \
docker push 'ghcr.io/stanford-crfm/marin/$(DOCKER_IMAGE_NAME):$(version)';)


# Meta-target that builds and then pushes the Docker images
cluster_docker: cluster_docker_build cluster_docker_push
@echo "Docker image build and push complete."

# Staging/test image flow (does not update lib/marin/src/marin/cluster/config.py)
# Used by infra/marin-us-central2-staging.yaml, which points at marin_cluster_test:latest.
TEST_CLUSTER_REPO ?= us-central2
TEST_DOCKER_IMAGE_NAME ?= marin_cluster_test

.PHONY: cluster_docker_test_build cluster_docker_test_push cluster_docker_test

cluster_docker_test_build:
@echo "Building staging Docker image using Dockerfile: docker/marin/Dockerfile.cluster"
docker buildx build --platform linux/amd64 --output "type=docker,compression=zstd" -t '$(TEST_DOCKER_IMAGE_NAME):latest' -f docker/marin/Dockerfile.cluster .
@echo "Tagging staging Docker image for $(TEST_CLUSTER_REPO)..."
docker tag '$(TEST_DOCKER_IMAGE_NAME):latest' '$(TEST_CLUSTER_REPO)-docker.pkg.dev/hai-gcp-models/marin/$(TEST_DOCKER_IMAGE_NAME):latest'

cluster_docker_test_push: cluster_docker_test_build
@echo "Authenticating and preparing repository in $(TEST_CLUSTER_REPO)..."
gcloud auth configure-docker $(TEST_CLUSTER_REPO)-docker.pkg.dev
gcloud artifacts repositories list --location=$(TEST_CLUSTER_REPO) --filter 'name:marin' > /dev/null || \
gcloud artifacts repositories create --repository-format=docker --location=$(TEST_CLUSTER_REPO) marin
@echo "Pushing staging Docker image..."
docker push '$(TEST_CLUSTER_REPO)-docker.pkg.dev/hai-gcp-models/marin/$(TEST_DOCKER_IMAGE_NAME):latest'

cluster_docker_test: cluster_docker_test_push
@echo "Staging Docker image build and push complete."


# Target to configure GCP registry cleanup policy for all standard regions
CLUSTER_REPOS = us-central2 us-central1 europe-west4 us-west4 us-east5 us-east1
default_registry_name = marin
configure_gcp_registry_all:
@echo "Configuring GCP registry cleanup policy for all standard regions..."
Expand Down
104 changes: 0 additions & 104 deletions docker/marin/Dockerfile.cluster

This file was deleted.

Loading
Loading