erfanzar · dvruette · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Aug 2, 2025
diff --git a/.github/workflows/docker-image_ray.yaml b/.github/workflows/docker-image_ray.yaml
@@ -0,0 +1,62 @@
+name: Docker Image CI/CD
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - {file: Dockerfile.ray, tag: ray-cpu, tagsuf: "-ray-cpu", args: "HARDWARE_TYPE=cpu RAY_IMAGE=rayproject/ray:2.47.1-py311"}
+          - {file: Dockerfile.ray, tag: ray-gpu, tagsuf: "-ray-gpu", args: "HARDWARE_TYPE=gpu RAY_IMAGE=rayproject/ray:2.47.1-py311-gpu"}
+          - {file: Dockerfile.ray, tag: ray-tpu, tagsuf: "-ray-tpu", args: "HARDWARE_TYPE=tpu RAY_IMAGE=rayproject/ray:2.47.1-py311"}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - run: pip install toml
+      - name: Extract version
+        id: meta
+        run: |
+          VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+          echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
+      - name: Log in to GHCR
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set up Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./${{ matrix.target.file }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: |
+            ghcr.io/dvruette/easydel:${{ steps.meta.outputs.version }}${{ matrix.target.tagsuf }}
+            ghcr.io/dvruette/easydel:latest${{ matrix.target.tagsuf }}
+            ghcr.io/dvruette/easydel:${{ steps.meta.outputs.date }}${{ matrix.target.tagsuf }}
+          build-args: |
+            VERSION=${{ steps.meta.outputs.version }}
+            ${{ matrix.target.args }}
+          cache-from: type=registry,ref=ghcr.io/dvruette/easydel:buildcache
+          cache-to: type=registry,ref=ghcr.io/dvruette/easydel:buildcache,mode=max
+      - name: Prune build cache
+        if: always()
+        run: |
+          docker buildx prune -af || true
+          docker system prune -af || true
diff --git a/.github/workflows/docker-image_tpu.yaml b/.github/workflows/docker-image_tpu.yaml
@@ -5,47 +5,38 @@ on:
     branches: [main]
   pull_request:
     branches: [main]
-  workflow_dispatch: # Allow manual triggers
+  workflow_dispatch:
 
 jobs:
   build-and-push:
     runs-on: ubuntu-latest
     permissions:
       contents: read
       packages: write
-
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - {file: Dockerfile, tag: cpu, tagsuf: "", args: "HARDWARE_TYPE=cpu"}
+          - {file: Dockerfile, tag: gpu, tagsuf: "-gpu", args: "HARDWARE_TYPE=gpu"}
+          - {file: Dockerfile, tag: tpu, tagsuf: "-tpu", args: "HARDWARE_TYPE=tpu"}
+          - {file: Dockerfile.ray, tag: ray-cpu, tagsuf: "-ray-cpu", args: "HARDWARE_TYPE=cpu RAY_IMAGE=rayproject/ray:2.36.0-py311"}
+          - {file: Dockerfile.ray, tag: ray-gpu, tagsuf: "-ray-gpu", args: "HARDWARE_TYPE=gpu RAY_IMAGE=rayproject/ray:2.36.0-py311-gpu"}
+          - {file: Dockerfile.ray, tag: ray-tpu, tagsuf: "-ray-tpu", args: "HARDWARE_TYPE=tpu RAY_IMAGE=rayproject/ray:2.36.0-py311"}
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: '3.11'
-
-      - name: Install toml package
-        run: pip install toml
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Cache Docker layers
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Extract version from pyproject.toml
+      - run: pip install toml
+      - name: Extract version
         id: meta
         run: |
           VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
           echo "version=$VERSION" >> $GITHUB_OUTPUT
           echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
-
-      - name: Log in to GitHub Container Registry
-        if: github.event_name != 'pull_request' # Avoid pushing on PRs
+      - name: Log in to GHCR
+        if: github.event_name != 'pull_request'
         uses: docker/login-action@v3
         with:
           registry: ghcr.io

diff --git a/Dockerfile b/Dockerfile
@@ -105,4 +105,4 @@ ENV PYTHONPATH=/app:/app/easydel:. \
 USER easydel
 WORKDIR /app
 
-CMD ["bash"]
+CMD ["bash"]
diff --git a/Dockerfile.ray b/Dockerfile.ray
@@ -0,0 +1,60 @@
+# syntax=docker/dockerfile:1
+ARG RAY_IMAGE=rayproject/ray:2.49.1-py311
+FROM ${RAY_IMAGE}
+
+WORKDIR /home/ray
+
+RUN sudo apt-get update && sudo apt-get install -y --no-install-recommends libgomp1 rsync ca-certificates curl git build-essential && sudo rm -rf /var/lib/apt/lists/*
+
+RUN python3.11 -m pip install uv
+
+COPY pyproject.toml uv.lock* ./
+
+ARG HARDWARE_TYPE=cpu
+RUN --mount=type=cache,target=/root/.cache/uv uv venv /home/ray/.venv && \
+    if [ "$HARDWARE_TYPE" = "gpu" ]; then uv sync --frozen --no-dev --no-install-project --extra gpu; \
+    elif [ "$HARDWARE_TYPE" = "tpu" ]; then uv sync --frozen --no-dev --no-install-project --extra tpu; \
+    else uv sync --frozen --no-dev --no-install-project; fi
+
+ENV PATH="/home/ray/.venv/bin:$PATH" \
+    VIRTUAL_ENV="/home/ray/.venv" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
+RUN sed -i \
+    -e '/^# >>> conda initialize >>>/,/^# <<< conda initialize <<</d' \
+    -e '/^export \+PATH=\$HOME\/anaconda3\/bin:\$PATH$/d' \
+    /home/ray/.bashrc && \
+    printf 'export VIRTUAL_ENV=/home/ray/.venv\nexport PATH="/home/ray/.venv/bin:$PATH"\n' >> /home/ray/.bashrc
+
+COPY . .
+
+RUN uv pip install -e . --no-deps
+RUN uv pip install \
+    google-api-python-client \
+    google-auth-httplib2 \
+    google-auth-oauthlib \
+    cryptography
+
+RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz && \
+    tar -xf google-cloud-cli-linux-x86_64.tar.gz && \
+    printf "N\nY\n\n" | script -qec ./google-cloud-sdk/install.sh && \
+    rm google-cloud-cli-linux-x86_64.tar.gz
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
+COPY ./preemptible-fix.patch .
+# apply the patch to fix preemptible GCP in Ray in the installed ray package
+RUN patch -d /home/ray/.venv/lib/python3.11/site-packages/ -p2 < preemptible-fix.patch
+
+ARG VERSION
+ENV VERSION=$VERSION \
+    HARDWARE_TYPE=$HARDWARE_TYPE
+
+LABEL org.opencontainers.image.version=$VERSION \
+      org.opencontainers.image.description="EasyDeL on Ray base image" \
+      org.opencontainers.image.source="https://github.com/dvruette/EasyDeL"
+
+ENTRYPOINT []
+CMD ["bash"]
diff --git a/easydel/__init__.py b/easydel/__init__.py
@@ -33,23 +33,23 @@
 if _check_bool_flag("EASYDEL_AUTO", True):
     _sys.setrecursionlimit(10000)
 
-    # Tell jax xla bridge to stay quiet and only yied warnings or errors.
-    _getlogger("jax._src.xla_bridge").setLevel(30)
-    _getlogger("jax._src.mesh_utils").setLevel(30)
-    _getlogger("jax._src.distributed").setLevel(30)
-    # these people talk too much
-    _getlogger("eray-executor").setLevel(30)
-    _getlogger("absl").setLevel(30)
-    _getlogger("datasets").setLevel(30)
+    # # Tell jax xla bridge to stay quiet and only yied warnings or errors.
+    # _getlogger("jax._src.xla_bridge").setLevel(30)
+    # _getlogger("jax._src.mesh_utils").setLevel(30)
+    # _getlogger("jax._src.distributed").setLevel(30)
+    # # these people talk too much
+    # _getlogger("eray-executor").setLevel(30)
+    # _getlogger("absl").setLevel(30)
+    # # _getlogger("datasets").setLevel(30)
 
-    _os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-    _os.environ["KMP_AFFINITY"] = "noverbose"
-    _os.environ["GRPC_VERBOSITY"] = "3"
-    _os.environ["GLOG_minloglevel"] = "3"
-    _os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
-    _os.environ["CACHE_TRITON_KERNELS"] = "1"
-    _os.environ["TPU_MIN_LOG_LEVEL"] = "2"
-    _os.environ["TPU_STDERR_LOG_LEVEL"] = "2"
+    # _os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+    # _os.environ["KMP_AFFINITY"] = "noverbose"
+    # _os.environ["GRPC_VERBOSITY"] = "3"
+    # _os.environ["GLOG_minloglevel"] = "3"
+    # _os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
+    # _os.environ["CACHE_TRITON_KERNELS"] = "1"
+    # _os.environ["TPU_MIN_LOG_LEVEL"] = "2"
+    # _os.environ["TPU_STDERR_LOG_LEVEL"] = "2"
     _os.environ["XLA_FLAGS"] = (
         _os.getenv("XLA_FLAGS", "") + " "
         "--xla_gpu_triton_gemm_any=true  "
@@ -70,7 +70,7 @@
         "--xla_gpu_force_compilation_parallelism=4 "
         "--xla_gpu_enable_shared_constants=true "
         "--xla_gpu_enable_triton_gemm=true "
-        "--xla_gpu_graph_level=3 "
+        # "--xla_gpu_graph_level=3 "
         "--xla_gpu_enable_command_buffer=  "
     )
     _os.environ["LIBTPU_INIT_ARGS"] = (

diff --git a/easydel/infra/base_state.py b/easydel/infra/base_state.py
@@ -484,6 +484,8 @@ def save_optimizer(self, save_directory: str | ePathLike, float_dtype: jnp.dtype
                     )
             except Exception as e:
                 logger.error(f"Optimizer save failed: {e!s}")
+                import traceback
+                traceback.print_exc()
                 raise
         else:
             logger.info("Current State don't contain any Optimizer.")

diff --git a/easydel/infra/modeling_outputs.py b/easydel/infra/modeling_outputs.py
@@ -191,6 +191,7 @@ def __reduce__(self):
 class AttentionLayerOutput(ModelOutput):
     attention_output: chex.Array
     attention_weight: chex.Array | None = None
+    attention_logits: chex.Array | None = None
     cache_view: TransformerCacheView | None = None
 
 
@@ -199,6 +200,7 @@ class EncoderLayerOutput(ModelOutput):
     hidden_states: chex.Array
     residual_states: chex.Array | None = None
     attention_weight: chex.Array | None = None
+    attention_logits: chex.Array | None = None
 
 
 @auto_pytree
@@ -207,6 +209,7 @@ class DecoderLayerOutput(ModelOutput):
     residual_states: chex.Array | None = None
     cross_attention: chex.Array | None = None
     attention_weight: chex.Array | None = None
+    attention_logits: chex.Array | None = None
     router_logits: chex.Array | None = None
     gate_loss: chex.Array | None = None
     cache_view: TransformerCacheView | None = None
@@ -236,6 +239,7 @@ class BaseModelOutput(ModelOutput):
     last_hidden_state: chex.Array = None
     hidden_states: tuple[chex.Array] | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     past_key_values: dict[str, chex.Array] | None = None
     loss: chex.Array | None = None
 
@@ -329,6 +333,7 @@ class BaseModelOutputWithPast(ModelOutput):
     past_key_values: dict[str, chex.Array] | None = None
     hidden_states: tuple[chex.Array] | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -361,6 +366,7 @@ class BaseModelOutputWithPooling(ModelOutput):
     pooler_output: chex.Array = None
     hidden_states: tuple[chex.Array] | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -410,7 +416,9 @@ class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
     hidden_states: tuple[chex.Array] | None = None
     past_key_values: TransformerCache | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     cross_attentions: tuple[chex.Array] | None = None
+    cross_attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -457,7 +465,9 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
     past_key_values: TransformerCache | None = None
     hidden_states: tuple[chex.Array] | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     cross_attentions: tuple[chex.Array] | None = None
+    cross_attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -516,10 +526,13 @@ class Seq2SeqModelOutput(ModelOutput):
     past_key_values: TransformerCache | None = None
     decoder_hidden_states: tuple[chex.Array] | None = None
     decoder_attentions: tuple[chex.Array] | None = None
+    decoder_attention_logits: tuple[chex.Array] | None = None
     cross_attentions: tuple[chex.Array] | None = None
+    cross_attention_logits: tuple[chex.Array] | None = None
     encoder_last_hidden_state: chex.Array | None = None
     encoder_hidden_states: tuple[chex.Array] | None = None
     encoder_attentions: tuple[chex.Array] | None = None
+    encoder_attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -561,7 +574,9 @@ class CausalLMOutputWithCrossAttentions(ModelOutput):
     past_key_values: TransformerCache | None = None
     hidden_states: tuple[chex.Array] | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     cross_attentions: tuple[chex.Array] | None = None
+    cross_attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -590,6 +605,7 @@ class MaskedLMOutput(ModelOutput):
     hidden_states: tuple[chex.Array] | None = None
     last_hidden_state: chex.Array | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     past_key_values: TransformerCache | None = None
     loss: chex.Array | None = None
 
@@ -648,10 +664,13 @@ class Seq2SeqLMOutput(ModelOutput):
     past_key_values: TransformerCache | None = None
     decoder_hidden_states: tuple[chex.Array] | None = None
     decoder_attentions: tuple[chex.Array] | None = None
+    decoder_attention_logits: tuple[chex.Array] | None = None
     cross_attentions: tuple[chex.Array] | None = None
+    cross_attention_logits: tuple[chex.Array] | None = None
     encoder_last_hidden_state: chex.Array | None = None
     encoder_hidden_states: tuple[chex.Array] | None = None
     encoder_attentions: tuple[chex.Array] | None = None
+    encoder_attention_logits: tuple[chex.Array] | None = None
     loss: chex.Array | None = None
 
 
@@ -946,6 +965,7 @@ class MoeModelOutput(ModelOutput):
     hidden_states: tuple[chex.Array] | None = None
     past_key_values: TransformerCache | None = None
     attentions: tuple[chex.Array] | None = None
+    attention_logits: tuple[chex.Array] | None = None
     router_logits: tuple[chex.Array] | None = None
     all_router_losses: tuple[chex.Array] | None = None
     logits: chex.Array = None