Update benchmarks.yml

juliagmt-google · web-flow · commit d67626c7985c · 2024-12-17T12:17:12.000-08:00
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -16,34 +16,6 @@ on:
         - 'no'
 
 jobs:
-  jax-build-and-test:
-    runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner
-    container:
-      image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest"
-
-    env:
-      JAXCI_HERMETIC_PYTHON_VERSION: 3.11
-
-    steps:
-      - name: Checkout JAX Fork
-        uses: actions/checkout@v3
-        with:
-          repository: 'google-ml-infra/jax-fork'
-          path: jax-fork
-
-      - name: Install JAX Dependencies
-        working-directory: jax-fork
-        run: |
-          python -m pip install --upgrade pip
-          pip install pytest
-          pip install absl-py
-          pip install "jax[cuda12_pip]"  # Adjust CUDA version if needed
-          pip install google-benchmark
-      - name: Run JAX Multiprocess GPU Test
-        working-directory: jax-fork
-        continue-on-error: true
-        run: python -m pytest tests/multiprocess_gpu_test.py
-
   build-xla-gpu-and-test:
     runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner
     container:
@@ -113,29 +85,8 @@ jobs:
       - name: Run specific HLO file
         working-directory: xla
         run: |
-          echo "Before hlo_runner_main"
-          pwd  # Print the current working directory
-          ls -l  # list files in the directory
           ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning gemm_00881937d6d49056045c3325a12b108b.hlo &> results/gemm_00881937d6d49056045c3325a12b108b.hlo.log
-          echo "After hlo_runner_main"
-          ls -l results  # List files in the results directory
-          cat results/gemm_00881937d6d49056045c3325a12b108b.hlo.log
 
-      
-      # - name: Run HLO Module Benchmarks with GPU in xla/tests/fuzz
-      #   working-directory: xla
-      #   continue-on-error: true
-      #   run: |
-      #     for file in xla/tests/fuzz/*.hlo; do
-      #       filename=$(basename "$file")
-      #       # Skip expected failed hlo files.
-      #       if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then
-      #         echo "Skipping benchmark on $file"
-      #         continue
-      #       fi
-      #       echo "Running benchmark on $file" 
-      #       ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log
-      #     done
       - name: Wait For Connection
         uses: google-ml-infra/actions/ci_connection@main
         with:
@@ -148,20 +99,39 @@ jobs:
         working-directory: xla
         run: python parse_xla_logs.py results/gemm_00881937d6d49056045c3325a12b108b.hlo.log
 
-      - name: Wait For Connection
-        uses: google-ml-infra/actions/ci_connection@main
-        with:
-          halt-dispatch-input: ${{ inputs.halt-for-connection }}
-
       - name: Upload Results
         uses: actions/upload-artifact@v4
         with:
           name: gpu-xla-benchmarks
           path: xla/results
-      # - name: Wait For Connection
-      #   uses: google-ml-infra/actions/ci_connection@main
-      #   with:
-      #     halt-dispatch-input: ${{ inputs.halt-for-connection }}
+  # jax-build-and-test:
+  #   runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner
+  #   container:
+  #     image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest"
+
+  #   env:
+  #     JAXCI_HERMETIC_PYTHON_VERSION: 3.11
+
+  #   steps:
+  #     - name: Checkout JAX Fork
+  #       uses: actions/checkout@v3
+  #       with:
+  #         repository: 'google-ml-infra/jax-fork'
+  #         path: jax-fork
+
+  #     - name: Install JAX Dependencies
+  #       working-directory: jax-fork
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         pip install pytest
+  #         pip install absl-py
+  #         pip install "jax[cuda12_pip]"  # Adjust CUDA version if needed
+  #         pip install google-benchmark
+  #     - name: Run JAX Multiprocess GPU Test
+  #       working-directory: jax-fork
+  #       continue-on-error: true
+  #       run: python -m pytest tests/multiprocess_gpu_test.py
+      
 
       # - name: Run HLO Module Benchmarks withg GPU in xla/tests/fuzz
       #   working-directory: xla