Skip to content

Commit 26b2ad8

Browse files
committed
Add benchmark step
1 parent 73d83c6 commit 26b2ad8

1 file changed

Lines changed: 85 additions & 0 deletions

File tree

.github/workflows/run-gpu-join.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,88 @@ jobs:
160160
if: ${{ always() && matrix.run_python_gpu }}
161161
run: |
162162
docker compose down
163+
164+
- name: Benchmark GPU tests
165+
if: matrix.run_python_gpu
166+
run: |
167+
cat << 'EOF' > benchmark.py
168+
from huggingface_hub import snapshot_download
169+
import os
170+
import time
171+
from tqdm import tqdm
172+
import sedonadb
173+
174+
# 1. Download Dataset
175+
print("Downloading dataset...")
176+
snapshot_download(
177+
repo_id='apache-sedona/spatialbench',
178+
repo_type='dataset',
179+
local_dir='hf-data',
180+
allow_patterns=[
181+
"v0.1.0/sf1/zone/*",
182+
"v0.1.0/sf1/trip/*"],
183+
)
184+
185+
# 2. Setup Sedona Context
186+
ctx = sedonadb.connect()
187+
ctx.options.memory_limit = "unlimited"
188+
189+
ctx.sql("CREATE EXTERNAL TABLE zone STORED AS PARQUET LOCATION 'hf-data/v0.1.0/sf1/zone/'")
190+
ctx.sql("CREATE EXTERNAL TABLE trip STORED AS PARQUET LOCATION 'hf-data/v0.1.0/sf1/trip/'")
191+
192+
def run_benchmark(ctx, runs=6):
193+
query = """
194+
SELECT COUNT(*) AS cross_zone_trip_count
195+
FROM trip t
196+
JOIN zone pickup_zone
197+
ON ST_Within(ST_GeomFromWKB(t.t_pickuploc), ST_GeomFromWKB(pickup_zone.z_boundary))
198+
JOIN zone dropoff_zone
199+
ON ST_Within(ST_GeomFromWKB(t.t_dropoffloc), ST_GeomFromWKB(dropoff_zone.z_boundary))
200+
WHERE pickup_zone.z_zonekey != dropoff_zone.z_zonekey
201+
"""
202+
203+
modes = [("CPU", "false"), ("GPU", "true")]
204+
averages = {}
205+
206+
for mode_name, gpu_flag in modes:
207+
print(f"\n🚀 Running {mode_name} Benchmark...")
208+
ctx.sql(f"SET gpu.enable = {gpu_flag}")
209+
if gpu_flag == "true":
210+
ctx.sql("SET datafusion.execution.batch_size = 2000000")
211+
else:
212+
ctx.sql("SET datafusion.execution.batch_size = 8192")
213+
214+
execution_times = []
215+
for i in tqdm(range(runs), desc=f"{mode_name} Progress"):
216+
start_time = time.time()
217+
result = ctx.sql(query)
218+
result.show() # Forces execution
219+
elapsed = time.time() - start_time
220+
221+
if i > 0: # Skip warmup
222+
execution_times.append(elapsed)
223+
print(f"Run {i}: {elapsed:.4f}s")
224+
225+
averages[mode_name] = sum(execution_times) / len(execution_times)
226+
227+
# Final Summary Output
228+
print("\n" + "="*30)
229+
print("📊 BENCHMARK RESULTS")
230+
print("="*30)
231+
cpu_avg = averages["CPU"]
232+
gpu_avg = averages["GPU"]
233+
speedup = cpu_avg / gpu_avg if gpu_avg > 0 else 0
234+
print(f"CPU Average: {cpu_avg:.4f}s")
235+
print(f"GPU Average: {gpu_avg:.4f}s")
236+
print(f"Speedup: {speedup:.2x}x")
237+
print("="*30)
238+
239+
if __name__ == "__main__":
240+
run_benchmark(ctx)
241+
EOF
242+
243+
# Install additional dependencies required by the benchmark script
244+
pip install huggingface_hub tqdm
245+
246+
# Execute the script
247+
python benchmark.py

0 commit comments

Comments
 (0)