Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
235 commits
Select commit Hold shift + click to select a range
7d1b97f
Create benchmarks.yml
juliagmt-google Dec 10, 2024
86af462
Update benchmarks.yml
juliagmt-google Dec 10, 2024
97e4a38
Update benchmarks.yml
juliagmt-google Dec 10, 2024
d4f930b
Update benchmarks.yml
juliagmt-google Dec 10, 2024
e5ae47d
Update benchmarks.yml
juliagmt-google Dec 11, 2024
9042275
Update benchmarks.yml
juliagmt-google Dec 11, 2024
6c3bf18
Update benchmarks.yml
juliagmt-google Dec 11, 2024
f7eba41
Update benchmarks.yml
juliagmt-google Dec 11, 2024
310cfaa
Update benchmarks.yml
juliagmt-google Dec 11, 2024
8653ab9
Update benchmarks.yml
juliagmt-google Dec 11, 2024
7ce554f
Update benchmarks.yml
juliagmt-google Dec 11, 2024
7003c4e
Update benchmarks.yml
juliagmt-google Dec 12, 2024
44655f4
Update benchmarks.yml
juliagmt-google Dec 12, 2024
dcfcf4e
Update benchmarks.yml
juliagmt-google Dec 12, 2024
63fd777
Update benchmarks.yml
juliagmt-google Dec 12, 2024
5058ec2
Update benchmarks.yml
juliagmt-google Dec 12, 2024
0f102e1
Update benchmarks.yml
juliagmt-google Dec 12, 2024
b49282e
Update benchmarks.yml
juliagmt-google Dec 12, 2024
e891686
Update benchmarks.yml
juliagmt-google Dec 12, 2024
352234f
Update benchmarks.yml
juliagmt-google Dec 12, 2024
1cfa6ce
Update benchmarks.yml
juliagmt-google Dec 12, 2024
d528a9c
Update benchmarks.yml
juliagmt-google Dec 12, 2024
55a366e
Update benchmarks.yml
juliagmt-google Dec 12, 2024
0c1adc0
Update benchmarks.yml
juliagmt-google Dec 12, 2024
f581538
Update benchmarks.yml
juliagmt-google Dec 12, 2024
2c9fe6c
Update benchmarks.yml
juliagmt-google Dec 12, 2024
01cda16
Update benchmarks.yml
juliagmt-google Dec 12, 2024
4b8fbdb
Update benchmarks.yml
juliagmt-google Dec 12, 2024
57c1673
Update benchmarks.yml
juliagmt-google Dec 12, 2024
43bf80b
Update benchmarks.yml
juliagmt-google Dec 12, 2024
cdb1a30
Update benchmarks.yml
juliagmt-google Dec 12, 2024
620a9e1
Update benchmarks.yml
juliagmt-google Dec 12, 2024
863f263
Update benchmarks.yml
juliagmt-google Dec 12, 2024
af183a4
Update benchmarks.yml
juliagmt-google Dec 12, 2024
a0b2dd3
Update benchmarks.yml
juliagmt-google Dec 12, 2024
5a363a9
Update benchmarks.yml
juliagmt-google Dec 12, 2024
9fef073
Update benchmarks.yml
juliagmt-google Dec 12, 2024
d057d0f
Update benchmarks.yml
juliagmt-google Dec 12, 2024
176da98
Update benchmarks.yml
juliagmt-google Dec 12, 2024
724b7d7
Update benchmarks.yml
juliagmt-google Dec 12, 2024
392d152
Update benchmarks.yml
juliagmt-google Dec 12, 2024
f6bb4d4
Update benchmarks.yml
juliagmt-google Dec 12, 2024
33ce83c
Update benchmarks.yml
juliagmt-google Dec 13, 2024
341e2a1
Update benchmarks.yml
juliagmt-google Dec 13, 2024
89deed1
Update benchmarks.yml
juliagmt-google Dec 13, 2024
d56c1db
Update benchmarks.yml
juliagmt-google Dec 13, 2024
5afa062
Update benchmarks.yml
juliagmt-google Dec 13, 2024
a057dbe
Update benchmarks.yml
juliagmt-google Dec 13, 2024
6e208c5
Update benchmarks.yml
juliagmt-google Dec 13, 2024
af424ac
Update benchmarks.yml
juliagmt-google Dec 13, 2024
a05d32a
Update benchmarks.yml
juliagmt-google Dec 13, 2024
1e3cd5f
Update benchmarks.yml
juliagmt-google Dec 13, 2024
8472ccd
Update benchmarks.yml
juliagmt-google Dec 13, 2024
6fdae96
Update benchmarks.yml
juliagmt-google Dec 13, 2024
1919b0e
Update benchmarks.yml
juliagmt-google Dec 13, 2024
a76b14e
Update benchmarks.yml
juliagmt-google Dec 13, 2024
f772be5
Update benchmarks.yml
juliagmt-google Dec 13, 2024
d22041e
Update benchmarks.yml
juliagmt-google Dec 13, 2024
5051abd
Update benchmarks.yml
juliagmt-google Dec 13, 2024
81301c8
Update benchmarks.yml
juliagmt-google Dec 14, 2024
eefc513
Update benchmarks.yml
juliagmt-google Dec 16, 2024
9067033
Update benchmarks.yml
juliagmt-google Dec 16, 2024
5ccac5e
Update benchmarks.yml
juliagmt-google Dec 16, 2024
a69190b
Update benchmarks.yml
juliagmt-google Dec 16, 2024
2222254
Update benchmarks.yml
juliagmt-google Dec 16, 2024
a1550fc
Update benchmarks.yml
juliagmt-google Dec 16, 2024
d3b58db
Update benchmarks.yml
juliagmt-google Dec 16, 2024
dee2556
Update benchmarks.yml
juliagmt-google Dec 16, 2024
b2206e1
Update benchmarks.yml
juliagmt-google Dec 16, 2024
ad59911
Update benchmarks.yml
juliagmt-google Dec 16, 2024
036fce8
Update benchmarks.yml
juliagmt-google Dec 16, 2024
12362de
Create parse_xla_logs.py
juliagmt-google Dec 17, 2024
0ea713f
Update benchmarks.yml
juliagmt-google Dec 17, 2024
0161250
Update benchmarks.yml
juliagmt-google Dec 17, 2024
85d332a
Update benchmarks.yml
juliagmt-google Dec 17, 2024
d67626c
Update benchmarks.yml
juliagmt-google Dec 17, 2024
39567e4
Update benchmarks.yml
juliagmt-google Dec 17, 2024
a94f159
Update benchmarks.yml
juliagmt-google Dec 18, 2024
ff8dd3e
Update benchmarks.yml
juliagmt-google Dec 19, 2024
7e89850
Update benchmarks.yml
juliagmt-google Dec 19, 2024
97e85cf
Update benchmarks.yml
juliagmt-google Dec 19, 2024
370eb4a
Update benchmarks.yml
juliagmt-google Dec 20, 2024
8bcac86
Update benchmarks.yml
juliagmt-google Dec 23, 2024
36b2d89
Update benchmarks.yml
juliagmt-google Dec 24, 2024
536e4c7
Update benchmarks.yml
juliagmt-google Jan 14, 2025
069011e
Update benchmarks.yml
juliagmt-google Jan 14, 2025
e278006
Update benchmarks.yml
juliagmt-google Jan 14, 2025
43b24ae
Update benchmarks.yml
juliagmt-google Jan 14, 2025
87ba364
Update benchmarks.yml
juliagmt-google Jan 15, 2025
4b6ce3e
Update benchmarks.yml
juliagmt-google Jan 15, 2025
808d88e
Update benchmarks.yml
juliagmt-google Jan 16, 2025
5533e6e
Update benchmarks.yml
juliagmt-google Jan 16, 2025
dccf656
Update benchmarks.yml
juliagmt-google Jan 16, 2025
69d154e
Update benchmarks.yml
juliagmt-google Jan 17, 2025
0855903
Update benchmarks.yml
juliagmt-google Jan 17, 2025
3419350
Update benchmarks.yml
juliagmt-google Jan 17, 2025
2c1776a
Update benchmarks.yml
juliagmt-google Jan 17, 2025
84d9f47
Update benchmarks.yml
juliagmt-google Jan 17, 2025
53c5b6e
Update benchmarks.yml
juliagmt-google Jan 17, 2025
3e2f050
Update benchmarks.yml
juliagmt-google Jan 17, 2025
f9c5740
Update benchmarks.yml
juliagmt-google Jan 17, 2025
9345fd6
Update benchmarks.yml
juliagmt-google Jan 17, 2025
731ef68
Update benchmarks.yml
juliagmt-google Jan 20, 2025
783bc57
Update benchmarks.yml
juliagmt-google Jan 21, 2025
2bdbf77
Update benchmarks.yml
juliagmt-google Jan 21, 2025
3bafca0
Update benchmarks.yml
juliagmt-google Jan 21, 2025
a527c94
Update benchmarks.yml
juliagmt-google Jan 21, 2025
df63d67
Update benchmarks.yml
juliagmt-google Jan 21, 2025
54be71c
Update benchmarks.yml
juliagmt-google Jan 21, 2025
788cc88
Update benchmarks.yml
juliagmt-google Jan 21, 2025
b5c3ab7
Update benchmarks.yml
juliagmt-google Jan 21, 2025
343cfc6
Update benchmarks.yml
juliagmt-google Jan 22, 2025
fe26440
Update benchmarks.yml
juliagmt-google Jan 22, 2025
e45ccf8
Update benchmarks.yml
juliagmt-google Jan 22, 2025
f075237
Update benchmarks.yml
juliagmt-google Jan 22, 2025
f216d1c
Update benchmarks.yml
juliagmt-google Jan 22, 2025
634148e
Update benchmarks.yml
juliagmt-google Jan 23, 2025
e6223d0
Update benchmarks.yml
juliagmt-google Jan 27, 2025
e1123df
Update benchmarks.yml
juliagmt-google Jan 27, 2025
e11186e
Update benchmarks.yml
juliagmt-google Jan 27, 2025
16ab09d
Update benchmarks.yml
juliagmt-google Jan 27, 2025
57829a9
Update benchmarks.yml
juliagmt-google Jan 27, 2025
a7721cd
Update benchmarks.yml
juliagmt-google Jan 27, 2025
d15c675
Update benchmarks.yml
juliagmt-google Jan 27, 2025
278a117
Update benchmarks.yml
juliagmt-google Jan 27, 2025
a457e16
Update benchmarks.yml
juliagmt-google Jan 27, 2025
de9a7d3
Update benchmarks.yml
juliagmt-google Jan 27, 2025
eab3435
Update benchmarks.yml
juliagmt-google Jan 27, 2025
caccc3c
Update benchmarks.yml
juliagmt-google Jan 27, 2025
16b0591
Update benchmarks.yml
juliagmt-google Jan 27, 2025
1682c63
Update benchmarks.yml
juliagmt-google Jan 27, 2025
0119316
Update benchmarks.yml
juliagmt-google Jan 27, 2025
b46e56c
Update benchmarks.yml
juliagmt-google Jan 28, 2025
1541dc3
Update benchmarks.yml
juliagmt-google Jan 28, 2025
f0d2332
Update benchmarks.yml
juliagmt-google Jan 28, 2025
bca6b94
Update benchmarks.yml
juliagmt-google Jan 28, 2025
4970a53
Update benchmarks.yml
juliagmt-google Jan 28, 2025
2b0ad2a
Update benchmarks.yml
juliagmt-google Jan 28, 2025
0365726
Update benchmarks.yml
juliagmt-google Jan 28, 2025
3f7ca39
Update benchmarks.yml
juliagmt-google Jan 28, 2025
e9249ad
Update benchmarks.yml
juliagmt-google Jan 28, 2025
08076d2
Update benchmarks.yml
juliagmt-google Jan 29, 2025
fd61b43
Update benchmarks.yml
juliagmt-google Jan 29, 2025
78e75f4
Update benchmarks.yml
juliagmt-google Jan 29, 2025
8240dcc
Update benchmarks.yml
juliagmt-google Jan 29, 2025
684ec54
Update benchmarks.yml
juliagmt-google Jan 29, 2025
84faa6e
Update benchmarks.yml
juliagmt-google Jan 29, 2025
c26d947
Update benchmarks.yml
juliagmt-google Jan 29, 2025
74a717e
Update benchmarks.yml
juliagmt-google Jan 29, 2025
5fb3d04
Update benchmarks.yml
juliagmt-google Jan 29, 2025
5f03557
Update benchmarks.yml
juliagmt-google Jan 29, 2025
6b1e703
Update benchmarks.yml
juliagmt-google Jan 29, 2025
b59fa62
Update benchmarks.yml
juliagmt-google Jan 29, 2025
f5a873b
Update benchmarks.yml
juliagmt-google Jan 29, 2025
e30f672
Update benchmarks.yml
juliagmt-google Jan 30, 2025
690b880
Update benchmarks.yml
juliagmt-google Jan 31, 2025
896a9bf
Update benchmarks.yml
juliagmt-google Jan 31, 2025
e518ec3
Update benchmarks.yml
juliagmt-google Feb 7, 2025
e0f6662
Update benchmarks.yml
juliagmt-google Feb 7, 2025
6d0394e
Update benchmarks.yml
juliagmt-google Feb 10, 2025
15efd20
Update benchmarks.yml
juliagmt-google Feb 10, 2025
e1a1c2e
Update benchmarks.yml
juliagmt-google Feb 10, 2025
0e04c79
Update benchmarks.yml
juliagmt-google Feb 10, 2025
4e352fc
Update benchmarks.yml
juliagmt-google Feb 10, 2025
4f8c1e7
Update benchmarks.yml
juliagmt-google Feb 10, 2025
bf63119
Update benchmarks.yml
juliagmt-google Feb 10, 2025
5667c46
Update benchmarks.yml
juliagmt-google Feb 10, 2025
7f0a09f
Update benchmarks.yml
juliagmt-google Feb 10, 2025
3bc65f8
Update benchmarks.yml
juliagmt-google Feb 13, 2025
e6b9ec7
Update benchmarks.yml
juliagmt-google Feb 13, 2025
78cddef
Update benchmarks.yml
juliagmt-google Feb 14, 2025
f9527cf
Update benchmarks.yml
juliagmt-google Feb 14, 2025
560c96e
Update benchmarks.yml
juliagmt-google Feb 14, 2025
c673278
Update benchmarks.yml
juliagmt-google Feb 14, 2025
1f94828
Update benchmarks.yml
juliagmt-google Feb 14, 2025
3f95da7
Update benchmarks.yml
juliagmt-google Feb 14, 2025
ec601af
Update benchmarks.yml
juliagmt-google Feb 15, 2025
20fc17d
Update benchmarks.yml
juliagmt-google Feb 15, 2025
a095ac7
Update benchmarks.yml
juliagmt-google Feb 15, 2025
0c675e2
Update benchmarks.yml
juliagmt-google Feb 15, 2025
3f8fdbd
Update benchmarks.yml
juliagmt-google Feb 15, 2025
3136300
Update benchmarks.yml
juliagmt-google Feb 15, 2025
9b9830f
Update benchmarks.yml
juliagmt-google Feb 18, 2025
5805ee6
Update benchmarks.yml
juliagmt-google Feb 19, 2025
f5094ce
Update benchmarks.yml
juliagmt-google Feb 19, 2025
a4dea4b
Update benchmarks.yml
juliagmt-google Feb 19, 2025
08a8951
Update benchmarks.yml
juliagmt-google Feb 19, 2025
abda341
Update benchmarks.yml
juliagmt-google Feb 19, 2025
e5ba4b2
Update benchmarks.yml
juliagmt-google Feb 19, 2025
77c2e0a
Update benchmarks.yml
juliagmt-google Feb 19, 2025
a8c8c16
Update benchmarks.yml
juliagmt-google Feb 19, 2025
3d7c2d7
Update benchmarks.yml
juliagmt-google Feb 19, 2025
a4e9a8f
Update benchmarks.yml
juliagmt-google Feb 19, 2025
a1c684d
Update benchmarks.yml
juliagmt-google Feb 19, 2025
5ba454c
Update benchmarks.yml
juliagmt-google Feb 19, 2025
c42a41a
Update benchmarks.yml
juliagmt-google Feb 19, 2025
0ee8f54
Update benchmarks.yml
juliagmt-google Feb 21, 2025
acc23fd
Update benchmarks.yml
juliagmt-google Feb 21, 2025
f02ccef
Update benchmarks.yml
juliagmt-google Feb 21, 2025
1d4db34
Update benchmarks.yml
juliagmt-google Feb 21, 2025
46d97f9
Update benchmarks.yml
juliagmt-google Feb 21, 2025
92f23a8
Update benchmarks.yml
juliagmt-google Feb 21, 2025
8893183
Update benchmarks.yml
juliagmt-google Feb 21, 2025
1e86090
Update benchmarks.yml
juliagmt-google Feb 21, 2025
99ea45b
Update benchmarks.yml
juliagmt-google Feb 21, 2025
3e44ec7
Update benchmarks.yml
juliagmt-google Feb 21, 2025
b5d1db2
Update benchmarks.yml
juliagmt-google Feb 21, 2025
af9f592
Update benchmarks.yml
juliagmt-google Feb 21, 2025
0593aed
Update benchmarks.yml
juliagmt-google Feb 21, 2025
e44d997
Update benchmarks.yml
juliagmt-google Feb 21, 2025
ae05cde
Update benchmarks.yml
juliagmt-google Feb 21, 2025
d233727
Update benchmarks.yml
juliagmt-google Feb 21, 2025
f03cd88
Update benchmarks.yml
juliagmt-google Feb 21, 2025
8a1a755
Update benchmarks.yml
juliagmt-google Feb 21, 2025
23631ca
Update benchmarks.yml
juliagmt-google Feb 21, 2025
2e5d6fc
Update benchmarks.yml
juliagmt-google Feb 24, 2025
5a89600
Update benchmarks.yml
juliagmt-google Feb 24, 2025
5e91697
Update benchmarks.yml
juliagmt-google Feb 24, 2025
28b615a
Update benchmarks.yml
juliagmt-google Feb 24, 2025
44eca76
Update benchmarks.yml
juliagmt-google Feb 24, 2025
5805f0d
Update benchmarks.yml
juliagmt-google Feb 25, 2025
7d25041
Update benchmarks.yml
juliagmt-google Feb 25, 2025
23e21c2
Update benchmarks.yml
juliagmt-google Feb 25, 2025
f3d5432
Update benchmarks.yml
juliagmt-google Feb 25, 2025
fd3175e
Update benchmarks.yml
juliagmt-google Feb 25, 2025
0b0d0ec
Update benchmarks.yml
juliagmt-google Feb 25, 2025
9352da5
Update benchmarks.yml
juliagmt-google Feb 25, 2025
c9f2d12
Update benchmarks.yml
juliagmt-google Feb 25, 2025
9350bb1
Update benchmarks.yml
juliagmt-google Feb 26, 2025
7bc7b08
Update benchmarks.yml
juliagmt-google Mar 4, 2025
62a2ddd
Update benchmarks.yml
juliagmt-google Mar 4, 2025
fd4f423
Update benchmarks.yml
juliagmt-google Mar 7, 2025
2f1e208
Update benchmarks.yml
juliagmt-google Mar 10, 2025
eb5c971
Update benchmarks.yml
juliagmt-google Mar 10, 2025
409d207
Update benchmarks.yml
juliagmt-google Mar 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 231 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
name: Benchmarks

on:
pull_request:
branches:
- main
workflow_dispatch:
inputs:
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
type: choice
required: true
default: 'no'
options:
- 'yes'
- 'no'

jobs:
build-xla-gpu-and-test:
runs-on: "linux-x86-g2-48-l4-4gpu" #linux-x86-n2-16 # Use a GPU-enabled runner
container:
image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest"
options: --gpus all --privileged # Might need privileged mode, use with caution

steps:
- name: Checkout XLA
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
repository: openxla/xla
# - name: Checkout repository
# uses: actions/checkout@v3
# with:
# repository: juliagmt-google/xla
# path: xla

# - name: Print machine specs
# run: |
# nvidia-smi
# free -h # Memory information
# df -h # Disk space information
# uname -a # Kernel information

# - name: Set WORKSPACE_DIR
# env:
# WORKSPACE_DIR: ${{ github.workspace }}
# run: |
# echo "WORKSPACE_DIR is: $WORKSPACE_DIR"

# - name: Create results directory
# run: |
# mkdir -p results
# ls

# - name: Configure XLA for GPU backend
# run: |
# cd xla
# ./configure.py --backend CUDA --nccl

# - name: Set TF_CPP_MAX_VLOG_LEVEL
# env:
# TF_CPP_MAX_VLOG_LEVEL: 1
# run: |
# echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL"

- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}

# - name: Build hlo_runner_main
# run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main

# # TODO(juliagmt): Add more performance-critical HLOs to benchmark.
# - name: Run hlo_opt and generate xspace.pb
# run: |
# ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=$WORKSPACE_DIR/results/xspace.pb $WORKSPACE_DIR/xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo

# - name: Set XSPACE_PATH
# env:
# XSPACE_PATH: ${{ github.workspace }}/results/xspace.pb
# run: |
# echo "XSPACE_PATH is: $XSPACE_PATH"

# - name: Compute the cost of gpu_hlo_pass.hlo
# run: |
# bazel run //xla/tools:compute_cost -- --input=$WORKSPACE_DIR/xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo --format=hlo --gpu

# - name: Checkout juliagmt-google/xla
# uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
# with:
# repository: juliagmt-google/xla
# path: juliagmt-google-xla

# - name: Compute the device stats of gpu_hlo_pass.hlo
# run: |
# echo "XSPACE_PATH is: $XSPACE_PATH"
# bazel run //xla/tools:get_device_stats_main -- --input=$XSPACE_PATH
# working-directory: juliagmt-google-xla

# - name: Upload XSpace
# uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
# with:
# name: gpu-xla-benchmarks-xspace
# path: $WORKSPACE_DIR/results

# - name: Print machine specs
# run: |
# lscpu
# free -h # Memory information
# df -h # Disk space information
# uname -a # Kernel information

# - name: Create results directory
# working-directory: xla
# run: mkdir results

# - name: Wait For Connection
# uses: google-ml-infra/actions/ci_connection@main
# with:
# halt-dispatch-input: ${{ inputs.halt-for-connection }}

# - name: Configure XLA
# working-directory: xla
# run: |
# cd ..
# ls
# ./configure.py --backend CUDA --nccl

# - name: Set TF_CPP_MAX_VLOG_LEVEL
# working-directory: xla
# run: echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV # Use GITHUB_ENV to persist across steps

# - name: Build hlo_runner_main
# working-directory: xla
# run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main

# - name: Wait For Connection
# uses: google-ml-infra/actions/ci_connection@main
# with:
# halt-dispatch-input: ${{ inputs.halt-for-connection }}

# - name: Wait For Connection
# uses: google-ml-infra/actions/ci_connection@main
# with:
# halt-dispatch-input: ${{ inputs.halt-for-connection }}

# - name: Run an HLO file
# working-directory: xla
# run: |
# cd ..
# ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=xla/results/xspace.pb xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo

# - name: Get Device Stats
# working-directory: xla
# run: |
# cd ..
# PWD=$(pwd)
# bazel run //xla/tools:get_device_stats_main -- --input=$PWD/xla/results/xspace.pb

# - name: Wait For Connection
# uses: google-ml-infra/actions/ci_connection@main
# with:
# halt-dispatch-input: ${{ inputs.halt-for-connection }}

# - name: Download parse_xla_logs.py
# working-directory: xla
# run: wget https://raw.githubusercontent.com/juliagmt-google/xla/main/.github/workflows/parse_xla_logs.py

# - name: Parse XLA logs
# working-directory: xla
# run: python parse_xla_logs.py results/gpu_hlo_backend.log

# - name: Upload Results
# uses: actions/upload-artifact@v4
# with:
# name: gpu-xla-benchmarks
# path: xla/results


# jobs:
# Tests:
# strategy:
# # Don't fail fast - want to see results for all builds even if one fails.
# fail-fast: false
# matrix:
# job_info:
# - os: "linux-x86-g2-48-l4-4gpu"
# container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest",
# pretty_name: "Linux X86 runner with 4 NVIDIA L4 GPUs"
# # Expect more GPU types in the future.
# name: ${{ matrix.job_info.pretty_name }}
# runs-on: ${{ matrix.job_info.os }}
# container: ${{ matrix.job_info.container }}
# defaults:
# run:
# shell: bash
# timeout-minutes: 360
# steps:
# - name: Checkout XLA
# uses: actions/checkout@v4 # v4.1.1
# with:
# repository: openxla/xla
# - name: Wait For Connection
# uses: google-ml-infra/actions/ci_connection@main
# with:
# halt-dispatch-input: ${{ inputs.halt-for-connection }}
# - name: Print machine specs
# run: |
# nvidia-smi
# free -h # Memory information
# df -h # Disk space information
# uname -a # Kernel information
# - name: Create results directory
# run: mkdir results
# - name: Configure XLA for GPU backend
# run: ./configure.py --backend CUDA --nccl
# - name: Set TF_CPP_MAX_VLOG_LEVEL
# env:
# TF_CPP_MAX_VLOG_LEVEL: 1
# run: |
# echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL"
# - name: Build hlo_runner_main
# run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main
# # TODO(juliagmt): Add more performance-criticalHLOs to benchmark.
# - name: Run xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo
# run: ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=xla/results/xspace.pbtxt xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo
# - name: Upload XSpace
# uses: actions/upload-artifact@v4 # v4.1.1
# with:
# name: gpu-xla-benchmarks-xspace
# path: xla/results/xspace.pbtxt
120 changes: 120 additions & 0 deletions .github/workflows/parse_xla_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import re
import sys

class BenchmarkResult: # Define the class here
def __init__(self):
self.name = ""
self.flavor = ""
self.description = ""
self.xla_flags = ""
self.build_flags = []
self.source_uri = ""
self.device_name = ""
self.time_us = 0.0
self.last_average_loss = 0.0
self.memory_read_written = 0
self.buffer_allocation = []
self.nvptx_compilation_time_us = 0
self.hlo_passes_time_us = 0
self.run_backend_time_us = 0
self.device_time_us = 0.0
self.device_memcpy_time_us = 0.0
self.xprof_session_id = ""
self.error = ""
self.custom_metrics = []

class LogParser:
def __init__(self):
self.compilation_time = {} # You'll need to initialize this appropriately
# Units defined in google3/third_party/tensorflow/tsl/platform/numbers.cc
_TIME_UNITS = {"us": 1e-6, "ms": 1e-3, "s": 1, "min": 60, "h": 3600}
_TIME_REGEXP = re.compile(r"time: (\d+\.?\d*) (%s)" % "|".join(_TIME_UNITS))
_SIZE_UNITS = "KMGTPE"
_SIZE_REGEXP = re.compile(r"(\d+\.?\d*)([%s]i)?B" % _SIZE_UNITS)

def _ParseTimeFromLog(self, time_str: bytes, position: int = -1) -> int:
"""Returns the time in microseconds parsed from XLA logs."""
match = self._TIME_REGEXP.search(time_str.decode())
assert match, "Unable to parse the time on log line %d" % position
exp_ = self._TIME_UNITS[match.group(2)] * 1e6
return int(float(match.group(1)) * exp_)

def _ParseBytesFromLog(self, size_str: bytes, position: int = -1) -> int:
"""Returns the size in bytes parsed from XLA logs."""
match = self._SIZE_REGEXP.search(size_str.decode())
assert match, "Unable to parse the size on log line %d" % position
prefix = match.group(2) or "-"
exp_ = pow(2, 10 * (self._SIZE_UNITS.find(prefix[0]) + 1))
return int(float(match.group(1)) * exp_)

def _ParseLogLine(self, result, line, position) -> None:
"""Parses a single XLA log line and updates the BenchmarkResult proto.

It looks for certain text patterns and updates the result proto with
compilation stats and Xprof session id.

Args:
result: a benchmark result proto to be updated
line: a single line of benchmark log output
position: line number (for debugging)
"""
# Log output generated by --vmodule=nvptx_compiler=1
if b"NVPTXCompiler::CompileTargetBinary - CompileToPtx" in line:
thread_id = re.split(rb"\s+", line)[2]
self.compilation_time[thread_id] += self._ParseTimeFromLog(line, position)
return

# Log output generated by --vmodule=gpu_compiler=1
if b"HLO memory read+written:" in line:
result.memory_read_written += self._ParseBytesFromLog(line, position)
return
if b"GpuCompiler::RunHloPasses for" in line:
result.hlo_passes_time_us += self._ParseTimeFromLog(line, position)
return
if b"GpuCompiler::RunBackend for" in line:
result.run_backend_time_us += self._ParseTimeFromLog(line, position)
return

# Log output generated by --vmodule=bfc_allocator=2
if b"New Peak memory usage" in line and b"for GPU" in line:
match = re.search(rb"(\d+) bytes", line)
assert match, "Unable to parse the size on log line %d" % position
alloc_size = int(match.group(1))
if alloc_size > max(result.buffer_allocation, default=0):
result.buffer_allocation[:] = [alloc_size]
return

# Log output generated by --xprof_end_2_end_upload
if b"XprofResponse uploaded to http://xprof/" in line:
match = re.search(rb"session_id=([\w\-]+)", line)
assert match, "Unable to parse the XProf link"
result.xprof_session_id = match.group(1)
return

def parse_log_file(self, log_file_path):
result = BenchmarkResult()
with open(log_file_path, "rb") as f:
for i, line in enumerate(f):
self._ParseLogLine(result, line, i + 1)

# Update compilation time (total across threads) in the result object
for time_taken in self.compilation_time.values():
result.nvptx_compilation_time_us += time_taken
return result # Return the result object

if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python parse_log.py <log_file_path>")
sys.exit(1)

log_file_path = sys.argv[1]

parser = LogParser()
result = parser.parse_log_file(log_file_path)

# Access results
print(f"Memory Read+Written: {result.memory_read_written} bytes")
print(f"NVPTX Compilation Time: {result.nvptx_compilation_time_us} us")
print(f"HLO Passes Time: {result.hlo_passes_time_us} us")
print(f"Run Backend Time: {result.run_backend_time_us} us")
print(f"Buffer Allocation: {result.buffer_allocation} bytes")
Loading