@@ -23,16 +23,16 @@ jobs:
2323 options : --gpus all --privileged # Might need privileged mode, use with caution
2424
2525 steps :
26- # - name: Checkout XLA
27- # uses: actions/checkout@v3
28- # with:
29- # repository: openxla/xla # Replace with your fork if needed
30- # path: xla
31- - name : Checkout repository
26+ - name : Checkout XLA
3227 uses : actions/checkout@v3
3328 with :
34- repository : juliagmt-google /xla
29+ repository : openxla /xla # Replace with your fork if needed
3530 path : xla
31+ # - name: Checkout repository
32+ # uses: actions/checkout@v3
33+ # with:
34+ # repository: juliagmt-google/xla
35+ # path: xla
3636
3737 # - name: Wait For Connection
3838 # uses: google-ml-infra/actions/ci_connection@main
@@ -45,15 +45,16 @@ jobs:
4545 free -h # Memory information
4646 df -h # Disk space information
4747 uname -a # Kernel information
48+
49+ - name : Create results directory
50+ working-directory : xla
51+ run : mkdir results
4852
4953 - name : Set up Python 3.10 # Choose your desired Python version
5054 uses : actions/setup-python@v4
5155 with :
5256 python-version : ' 3.10'
5357
54- # - name: Install pip, setuptools, and wheel
55- # run: python -m pip install --upgrade pip setuptools wheel
56-
5758 - name : Create and activate virtual environment
5859 shell : bash # Force the use of bash
5960 run : |
@@ -67,11 +68,48 @@ jobs:
6768
6869 - name : Run run.sh for E2E benchmarks flax_2b (within venv)
6970 working-directory : xla/xla/backends/cpu/benchmarks/e2e/gemma2/flax_2b
70- timeout-minutes : 30 # Set the timeout to 30 minutes
71+ timeout-minutes : 30
72+ shell : bash
7173 run : |
72- bash run.sh
73-
74-
74+ source ../../../../../venv/bin/activate
75+ output=$(bash run.sh)
76+ echo "$output"
77+
78+ # Extract metrics using Python and regex
79+ python - << EOF
80+ import re
81+ import json
82+
83+ text = \"\"\"${output}\"\"\"
84+
85+ ttft_pattern = r"TTFT : ([\d.]+) ms ± ([\d.]+)%"
86+ e2e_latency_pattern = r"E2E Latency : ([\d.]+) ms ± ([\d.]+)%"
87+ tpot_pattern = r"TPOT : ([\d.]+) ms"
88+
89+ ttft_match = re.search(ttft_pattern, text)
90+ e2e_latency_match = re.search(e2e_latency_pattern, text)
91+ tpot_match = re.search(tpot_pattern, text)
92+
93+ metrics = {
94+ " TTFT " : {"value": ttft_match.group(1) if ttft_match else None, "std_dev": ttft_match.group(2) if ttft_match else None},
95+ " E2E Latency " : {"value": e2e_latency_match.group(1) if e2e_latency_match else None, "std_dev": e2e_latency_match.group(2) if e2e_latency_match else None},
96+ " TPOT " : {"value": tpot_match.group(1) if tpot_match else None},
97+ }
98+
99+ with open("metrics.json", "w") as f :
100+ json.dump(metrics, f, indent=4)
101+
102+ print(f"::set-output name=metrics::{json.dumps(metrics)}")
103+ EOF
104+
105+ # Copy the metrics.json file to the results directory
106+ cp metrics.json xla/results/
107+
108+
109+ - name : Wait For Connection
110+ uses : google-ml-infra/actions/ci_connection@main
111+ with :
112+ halt-dispatch-input : ${{ inputs.halt-for-connection }}
75113 # - name: Get GPU spec
76114 # working-directory: xla
77115 # continue-on-error: true
@@ -137,11 +175,11 @@ jobs:
137175 # working-directory: xla
138176 # run: python parse_xla_logs.py results/gpu_hlo_backend.log
139177
140- # - name: Upload Results
141- # uses: actions/upload-artifact@v4
142- # with:
143- # name: gpu-xla-benchmarks
144- # path: xla/results
178+ - name : Upload Results
179+ uses : actions/upload-artifact@v4
180+ with :
181+ name : gpu-xla-benchmarks
182+ path : xla/results
145183 # # jax-build-and-test:
146184 # # runs-on: linux-x86-g2-48-l4-4gpu # Use a GPU-enabled runner
147185 # # container:
0 commit comments