Create a workflow to run benchmarks #74
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| inputs: | |
| halt-for-connection: | |
| description: 'Should this workflow run wait for a remote connection?' | |
| type: choice | |
| required: true | |
| default: 'no' | |
| options: | |
| - 'yes' | |
| - 'no' | |
| jobs: | |
| jax-build-and-test: | |
| runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner | |
| container: | |
| image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest" | |
| env: | |
| JAXCI_HERMETIC_PYTHON_VERSION: 3.11 | |
| steps: | |
| - name: Checkout JAX Fork | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: 'google-ml-infra/jax-fork' | |
| path: jax-fork | |
| - name: Install JAX Dependencies | |
| working-directory: jax-fork | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install pytest | |
| pip install absl-py | |
| pip install "jax[cuda12_pip]" # Adjust CUDA version if needed | |
| pip install google-benchmark | |
| - name: Run JAX Multiprocess GPU Test | |
| working-directory: jax-fork | |
| continue-on-error: true | |
| run: python -m pytest tests/multiprocess_gpu_test.py | |
| build-xla-gpu-and-test: | |
| runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner | |
| container: | |
| image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest" | |
| options: --gpus all --privileged # Might need privileged mode, use with caution | |
| steps: | |
| - name: Checkout XLA | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: openxla/xla # Replace with your fork if needed | |
| path: xla | |
| - name: Create results directory | |
| working-directory: xla | |
| run: mkdir -p results | |
| - name: Get GPU spec | |
| working-directory: xla | |
| continue-on-error: true | |
| run: nvidia-smi | |
| - name: Configure XLA | |
| working-directory: xla | |
| run: ./configure.py --backend CUDA --nccl | |
| - name: Set TF_CPP_MAX_VLOG_LEVEL | |
| working-directory: xla | |
| run: echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV # Use GITHUB_ENV to persist across steps | |
| - name: Check TF_CPP_MAX_VLOG_LEVEL | |
| working-directory: xla | |
| run: echo "$TF_CPP_MAX_VLOG_LEVEL" | |
| - name: Build hlo_runner_main | |
| working-directory: xla | |
| run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
| - name: Create gemm_006f564ad71b327343de5f090e801883.hlo | |
| working-directory: xla | |
| run: | | |
| cat << EOF > gemm_00881937d6d49056045c3325a12b108b.hlo | |
| HloModule gemm_fusion_dot.542, entry_computation_layout={(bf16[1,8192,3072]{2,1,0}, s8[3072,6,512]{2,1,0})->bf16[1,8192,6,512]{3,2,1,0}} | |
| %gemm_fusion_dot.542_computation.clone (parameter_0.543: bf16[1,8192,3072], parameter_1.543: s8[3072,6,512]) -> bf16[1,8192,6,512] { | |
| %parameter_0.543 = bf16[1,8192,3072]{2,1,0} parameter(0) | |
| %bitcast.69925 = bf16[8192,3072]{1,0} bitcast(bf16[1,8192,3072]{2,1,0} %parameter_0.543) | |
| %parameter_1.543 = s8[3072,6,512]{2,1,0} parameter(1) | |
| %bitcast.69926 = s8[3072,3072]{1,0} bitcast(s8[3072,6,512]{2,1,0} %parameter_1.543) | |
| %convert.18528 = bf16[3072,3072]{1,0} convert(s8[3072,3072]{1,0} %bitcast.69926), metadata={op_name="pjit(_wrapped_fn)/jit(main)/tarzan_lm.apply/tarzan_lm.decode_with_params/lm/transformer/x_layers_0/self_attention/query/query.quantized_einsum/ABD,DNH->ABNH/convert_element_type[new_dtype=bfloat16 weak_type=False]" source_file="third_party/py/praxis/layers/quantization/operations.py" source_line=220} | |
| %dot.4949 = bf16[8192,3072]{1,0} dot(bf16[8192,3072]{1,0} %bitcast.69925, bf16[3072,3072]{1,0} %convert.18528), lhs_contracting_dims={1}, rhs_contracting_dims={0}, metadata={op_name="pjit(_wrapped_fn)/jit(main)/tarzan_lm.apply/tarzan_lm.decode_with_params/lm/transformer/x_layers_0/self_attention/query/query.quantized_einsum/ABD,DNH->ABNH/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=None preferred_element_type=None]" source_file="third_party/py/praxis/layers/quantization/operations.py" source_line=220} | |
| ROOT %bitcast.69927 = bf16[1,8192,6,512]{3,2,1,0} bitcast(bf16[8192,3072]{1,0} %dot.4949) | |
| } | |
| ENTRY %entry_computation (multiply.28104: bf16[1,8192,3072], Arg_51.52: s8[3072,6,512]) -> bf16[1,8192,6,512] { | |
| %multiply.28104 = bf16[1,8192,3072]{2,1,0} parameter(0) | |
| %Arg_51.52 = s8[3072,6,512]{2,1,0} parameter(1) | |
| ROOT %micro_kernel = bf16[1,8192,6,512]{3,2,1,0} fusion(bf16[1,8192,3072]{2,1,0} %multiply.28104, s8[3072,6,512]{2,1,0} %Arg_51.52), kind=kCustom, calls=%gemm_fusion_dot.542_computation.clone, metadata={op_name="pjit(_wrapped_fn)/jit(main)/tarzan_lm.apply/tarzan_lm.decode_with_params/lm/transformer/x_layers_0/self_attention/query/query.quantized_einsum/ABD,DNH->ABNH/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=None preferred_element_type=None]" source_file="third_party/py/praxis/layers/quantization/operations.py" source_line=220}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"fusion_backend_config":{"kind":"__triton_gemm"},"force_earliest_schedule":false} | |
| } | |
| EOF | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Run specific HLO file | |
| working-directory: xla | |
| run: | | |
| echo "Before hlo_runner_main" | |
| pwd # Print the current working directory | |
| ls -l # list files in the directory | |
| ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning gemm_00881937d6d49056045c3325a12b108b.hlo &> results/gemm_00881937d6d49056045c3325a12b108b.hlo.log | |
| echo "After hlo_runner_main" | |
| ls -l results # List files in the results directory | |
| cat results/gemm_00881937d6d49056045c3325a12b108b.hlo.log | |
| # - name: Run HLO Module Benchmarks with GPU in xla/tests/fuzz | |
| # working-directory: xla | |
| # continue-on-error: true | |
| # run: | | |
| # for file in xla/tests/fuzz/*.hlo; do | |
| # filename=$(basename "$file") | |
| # # Skip expected failed hlo files. | |
| # if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then | |
| # echo "Skipping benchmark on $file" | |
| # continue | |
| # fi | |
| # echo "Running benchmark on $file" | |
| # ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log | |
| # done | |
| - name: Wait For Connection | |
| uses: google-ml-infra/actions/ci_connection@main | |
| with: | |
| halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Download parse_xla_logs.py | |
| working-directory: xla | |
| run: wget https://raw.githubusercontent.com/juliagmt-google/xla/main/.github/workflows/parse_xla_logs.py | |
| - name: Parse XLA logs | |
| working-directory: xla | |
| run: python parse_xla_logs.py results/gemm_00881937d6d49056045c3325a12b108b.hlo.log | |
| - name: Wait For Connection | |
| uses: google-ml-infra/actions/ci_connection@main | |
| with: | |
| halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Upload Results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: gpu-xla-benchmarks | |
| path: xla/results | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Run HLO Module Benchmarks withg GPU in xla/tests/fuzz | |
| # working-directory: xla | |
| # continue-on-error: true | |
| # run: | | |
| # for file in xla/tests/fuzz/*.hlo; do | |
| # filename=$(basename "$file") | |
| # # Skip expected failed hlo files. | |
| # if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then | |
| # echo "Skipping benchmark on $file" | |
| # continue | |
| # fi | |
| # echo "Running benchmark on $file" &> results/"$filename".log | |
| # ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log | |
| # done | |
| # - name: Upload Results | |
| # uses: actions/upload-artifact@v4 | |
| # with: | |
| # name: gpu-xla-benchmarks | |
| # path: xla/results |