Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
5af1ae9
Add bencher yml file and bencher python script
johnpalsberg Nov 17, 2025
ed6fdf6
Nothing changed - for bencher
johnpalsberg Nov 17, 2025
ede1936
update access permissions
johnpalsberg Nov 17, 2025
a8b2140
add fgfa to path
johnpalsberg Nov 17, 2025
88e2a19
fix slug
johnpalsberg Nov 17, 2025
f7550c9
Fix json output formatting
johnpalsberg Nov 17, 2025
e268d56
Fix json format again
johnpalsberg Nov 17, 2025
a1764ef
Testing bencher connection
johnpalsberg Nov 17, 2025
e21338f
fix json output
johnpalsberg Nov 17, 2025
d49203f
json fix again
johnpalsberg Nov 17, 2025
d6d17f1
json fix again
johnpalsberg Nov 17, 2025
8d0c3ce
Change Bencher measurement to file-size
johnpalsberg Nov 24, 2025
8c2aa22
Fix the threshold measurement
johnpalsberg Nov 24, 2025
a0db4d7
Add Bencher latency benchmark
johnpalsberg Nov 24, 2025
1fe50e6
Fix latency benchmark bug
johnpalsberg Nov 24, 2025
f8b8137
Revert file size test find bug
johnpalsberg Nov 24, 2025
157e42d
Fix latency benchmark bug
johnpalsberg Nov 24, 2025
c84d5f5
More detailed GitHub debug info enabled
johnpalsberg Nov 24, 2025
da981e7
Test
johnpalsberg Nov 24, 2025
ebce89a
Track One test GFA file for now
johnpalsberg Nov 24, 2025
cae762b
Fix file size benchmark bug
johnpalsberg Nov 24, 2025
b10a349
Clean up latency benchmark
johnpalsberg Nov 24, 2025
66773cb
Fix threshold problem
johnpalsberg Nov 24, 2025
558b004
Another threshold attempted fix
johnpalsberg Nov 24, 2025
3a8d112
more threshold fixes
johnpalsberg Nov 24, 2025
87419ef
Use curl to download test file
johnpalsberg Nov 27, 2025
fe5c908
untrack test file
johnpalsberg Nov 27, 2025
d8248d1
Make each graph plot multiple points
johnpalsberg Nov 27, 2025
6210100
fix
johnpalsberg Nov 27, 2025
370e08c
Fix yml file
johnpalsberg Nov 27, 2025
17ac6e9
fix jsons
johnpalsberg Nov 27, 2025
dd5b36e
Test thresholds
johnpalsberg Nov 27, 2025
c8d19fb
Change file size reporting to KB instead of bytes
johnpalsberg Nov 27, 2025
a3fc886
Fix elided lifetime Clippy errors
johnpalsberg Dec 15, 2025
52e9cec
Merge branch 'main' into john-benchmark
johnpalsberg Dec 15, 2025
9231143
Modify in accordance to PR comments
johnpalsberg Jan 9, 2026
c807a2a
Complete python test files that group tests by gfa size
johnpalsberg Feb 9, 2026
8d4334e
Add additional functionality to new test files, and improve Bencher c…
johnpalsberg Feb 9, 2026
025da79
Fix Bencher commands
johnpalsberg Feb 19, 2026
d2b3330
Try to make fgfa work with Github CI
johnpalsberg Feb 23, 2026
a3d2043
Try to make fgfa work with Github CI #2
johnpalsberg Feb 23, 2026
434c2e8
Try to make fgfa work with Github CI #3
johnpalsberg Feb 23, 2026
a895bbd
Try to make fgfa work with Github CI #4
johnpalsberg Feb 23, 2026
3c6bb52
Try to make fgfa work with Github CI #5
johnpalsberg Feb 23, 2026
137344c
Integrate building into workflow
johnpalsberg Feb 23, 2026
9f0c8a9
Fix benchmark tags
johnpalsberg Feb 23, 2026
1a29b01
Fix more tags
johnpalsberg Feb 23, 2026
8baaff8
Testing Bencher threshold
johnpalsberg Feb 23, 2026
e8a7f68
Add more commands to the latency benchmark
johnpalsberg Feb 23, 2026
7c87155
Add normalization support to the latency benchmark
johnpalsberg Feb 24, 2026
a6060e2
Combine latency and filesize tests into one file
johnpalsberg Feb 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/bencher.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
on:
push:

jobs:
benchmark_base_branch:
name: Continuous Benchmarking with Bencher
permissions:
contents: read
checks: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: bencherdev/bencher@main
- name: Build release binary
run: cargo build --release
- name: Add fgfa to PATH
run: echo "$PWD/target/release" >> $GITHUB_PATH
- name: Fetch test data
run: make fetch

# FlatGFA Benchmarks
- name: Track file size benchmarks with Bencher
run: |
bencher run \
--project flatgfa \
--token '${{ secrets.BENCHER_API_TOKEN }}' \
--branch main \
--testbed ubuntu-latest \
--err \
--adapter json \
--github-actions '${{ secrets.GITHUB_TOKEN }}' \
python bench/benchmark_web.py mini_bencher 10 del
2 changes: 0 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: build

on:
push:
branches:
- main
pull_request:
branches:
- main
Expand Down
208 changes: 208 additions & 0 deletions bench/benchmark_web.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import sys
import os
import json
import subprocess
from pathlib import Path
import time
import tomllib
import gzip
import shutil

# Parse the GFA URLs from graphs.toml
with open("bench/graphs.toml", "rb") as f:
toml_graphs = tomllib.load(f)

hprc_dict = dict(toml_graphs["hprc"])

test_dict = dict(toml_graphs["test"])

gont_dict = dict(toml_graphs["1000gont"])

smoke_files = [test_dict["k"]]

mini_files = [test_dict["lpa"], test_dict["chr6c4"], hprc_dict["chrM"]]

med_files = [hprc_dict["chr20"], hprc_dict["chrX"], gont_dict["chr16"]]

big_files = [hprc_dict["chrY"], hprc_dict["chr1"], hprc_dict["chr10"]]

results = "filesize_benchmark.txt"

# Download a GFA file from the internet
def download_file(target_name, web_file):
gzipped = False
temp_name = ""
if "gfa.gz" in web_file:
gzipped = True
if gzipped:
temp_name = f"{target_name}.gz"

if not Path(target_name).exists():
if gzipped:
subprocess.run(["curl", "-o", temp_name, web_file],
check = True)
with gzip.open(temp_name, "rb") as f_in:
with open(target_name, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
subprocess.run(["rm", "-rf", temp_name], check = True)
else:
subprocess.run(["curl", "-o", target_name, web_file],
check = True)

# Run a single test
def test(command, test_file_name, num_iter):
if command == "extract":
with open(os.devnull, "w") as devnull:
start_time = time.time()
for _ in range(num_iter):
subprocess.run(["fgfa", "-I", test_file_name, "extract", "-n", "3", "-c", "3"], stdout=devnull,
stderr=devnull,
check=True)
end_time = time.time()
return ((end_time - start_time) * 1000) / num_iter

elif command == "chop":
with open(os.devnull, "w") as devnull:
start_time = time.time()
for _ in range(num_iter):
subprocess.run(["fgfa", "-I", test_file_name, "chop", "-c", "3", "-l"], stdout=devnull,
stderr=devnull,
check=True)
end_time = time.time()
return ((end_time - start_time) * 1000) / num_iter

elif command == "depth":
with open(os.devnull, "w") as devnull:
start_time = time.time()
for _ in range(num_iter):
subprocess.run(["fgfa", "-I", test_file_name, "depth"], stdout=devnull,
stderr=devnull,
check=True)
end_time = time.time()
return ((end_time - start_time) * 1000) / num_iter
return 0.0

# Run the latency benchmark across all test files
def benchmark(test_config):
del_cond = ""
norm_cond = ""
num_iter = 0
iter_count = -1

# Read command-line arguments
if len(sys.argv) >= 3:
iter_count = int(sys.argv[2]) # Can be any integer

if len(sys.argv) >= 4:
del_cond = sys.argv[3] # Can be "del", "_", or not provided

if len(sys.argv) >= 5:
norm_cond = sys.argv[4] # Can be "norm", or not provided

# Choose test file set
test_files = []
if "smoke" in test_config:
test_files = smoke_files
num_iter = 2
elif "mini" in test_config:
test_files = mini_files
num_iter = 10
elif "med" in test_config:
test_files = med_files
num_iter = 5
elif "big" in test_config:
test_files = big_files
num_iter = 2
else:
raise ValueError("Incorrect test config provided")

# Set number of test iterations
if not iter_count == -1:
num_iter = iter_count

i = 0
total_time = 0.0
extract_time = 0.0
chop_time = 0.0
depth_time = 0.0
size_bytes_avg = 0

# Run a test for each file in the set
for file in test_files:
test_file_name = f"tests/{test_config}_{i}.gfa"
download_file(test_file_name, file)
subprocess.run(["fgfa", "-I", test_file_name, "-o", results],
check = True)
size_bytes_avg += os.path.getsize(results)
extract_time += test("extract", test_file_name, num_iter)
chop_time += test("chop", test_file_name, num_iter)
depth_time += test("depth", test_file_name, num_iter)
subprocess.run(["rm", "-rf", results], check = True)

# Delete test files if flag set
if del_cond == "del":
subprocess.run(["rm", "-rf", test_file_name], check = True)
i += 1
if (norm_cond == "norm"):

# Write new normalization values
with open("bench/normalization.toml", "w") as f:
f.write("[normalization_factors]\n")
f.write(f"extract = {extract_time}\n")
f.write(f"chop = {chop_time}\n")
f.write(f"depth = {depth_time}\n")
return (1.0, size_bytes_avg)
else:

# Read normalization values
with open("bench/normalization.toml", "rb") as f:
data = tomllib.load(f)
extract_norm = data["normalization_factors"]["extract"]
chop_norm = data["normalization_factors"]["chop"]
depth_norm = data["normalization_factors"]["depth"]

# Normalize values
extract_time /= extract_norm
chop_time /= chop_norm
depth_time /= depth_norm

# Return the harmonic mean
size_bytes_avg /= len(test_files)
return (3 / ((1 / extract_time) + (1 / chop_time) + (1 / depth_time)), size_bytes_avg / 1000.0)

# Read the desired test file set from command-line input
test_config = ""
if len(sys.argv) >= 2:
test_config = sys.argv[1] # Can be either "smoke", "mini", "med", or "big"
else:
raise ValueError("No arguments provided")

bench_results = benchmark(test_config)


# Output the benchmark results, either in a Bencher JSON format, or a standard
# command-line format
if "bencher" in test_config:
bencher_json = {
"FlatGFA Benchmark Results": {
"Average Execution Latency": {"value": round(bench_results[0], 2)},
"Average File Size": {"value": round(bench_results[1], 2)},
}
}
json.dump(bencher_json, sys.stdout)
else:

# Only print latency info if flag set
if "latency" in test_config:
print(f"Average Execution Latency: {round(bench_results[0], 2)} ms")

# Only print filesize info if flag set
elif "filesize" in test_config:
print(f"Average File Size: {round(bench_results[1], 2)} KB")
else:
print(f"Average Execution Latency: {round(bench_results[0], 2)} ms")
print(f"Average File Size: {round(bench_results[1], 2)} KB")


# Command format: python bench/latency_benchmark_web.py [size](_bencher/_latency/_filesize) [run_count] (del/_) (norm)
# () = optional, [] = replace with value
32 changes: 32 additions & 0 deletions bench/filesize_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import sys
import os
import json
import subprocess
import tomllib

def benchmark(test_file):
subprocess.run(["fgfa", "-I", test_file, "-o", "filesize_benchmark.txt"],
check = True)
size_bytes = os.path.getsize("filesize_benchmark.txt")
subprocess.run(["rm", "-rf", "filesize_benchmark.txt"], check = True)
return size_bytes

gfa_files = ["tests/chr6.C4.gfa", "tests/DRB1-3123.gfa", "tests/LPA.gfa"]
sizes = {name: float(benchmark(name)) / 1000.0 for name in gfa_files}
size_bytes_avg = (sizes["tests/chr6.C4.gfa"] + sizes["tests/DRB1-3123.gfa"] +
sizes["tests/DRB1-3123.gfa"]) / 3.0

bencher_json = {
"FlatGFA File Size": {
"chr6.C4 (File Size)": {"value": round(sizes["tests/chr6.C4.gfa"], 2)},
"DRB1-3123 (File Size)": {"value": round(sizes["tests/DRB1-3123.gfa"], 2)},
"LPA (File Size)": {"value": round(sizes["tests/DRB1-3123.gfa"], 2)},
"Average (File Size)": {"value": round(size_bytes_avg, 2)}
}
}

json.dump(bencher_json, sys.stdout)




109 changes: 109 additions & 0 deletions bench/filesize_benchmark_web.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import sys
import os
import json
import subprocess
from pathlib import Path
import tomllib
import gzip
import shutil

# Parse the GFA URLs from graphs.toml
with open("bench/graphs.toml", "rb") as f:
toml_graphs = tomllib.load(f)

hprc_dict = dict(toml_graphs["hprc"])

test_dict = dict(toml_graphs["test"])

gont_dict = dict(toml_graphs["1000gont"])

smoke_files = [test_dict["k"]]

mini_files = [test_dict["lpa"], test_dict["chr6c4"], hprc_dict["chrM"]]

med_files = [hprc_dict["chr20"], hprc_dict["chrX"], gont_dict["chr16"]]

big_files = [hprc_dict["chrY"], hprc_dict["chr1"], hprc_dict["chr10"]]

results = "filesize_benchmark.txt"

# Download a GFA file from the internet
def download_file(target_name, web_file):
gzipped = False
temp_name = ""
if "gfa.gz" in web_file:
gzipped = True
if gzipped:
temp_name = f"{target_name}.gz"

if not Path(target_name).exists():
if gzipped:
subprocess.run(["curl", "-o", temp_name, web_file],
check = True)
with gzip.open(temp_name, "rb") as f_in:
with open(target_name, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
subprocess.run(["rm", "-rf", temp_name], check = True)
else:
subprocess.run(["curl", "-o", target_name, web_file],
check = True)

# Run the file size benchmark across all files
def benchmark(test_config):
test_cond = ""
if len(sys.argv) >= 3:
test_cond = sys.argv[2] # Can be "del", or not provided

# Choose test file set
test_files = []
if "smoke" in test_config:
test_files = smoke_files
elif "mini" in test_config:
test_files = mini_files
elif "med" in test_config:
test_files = med_files
elif "big" in test_config:
test_files = big_files
else:
raise ValueError("Incorrect test config provided")

size_bytes_avg = 0
i = 0

# Run a test for each file in the set
for file in test_files:
test_file_name = f"tests/{test_config}_{i}.gfa"
download_file(test_file_name, file)
subprocess.run(["fgfa", "-I", test_file_name, "-o", results],
check = True)
size_bytes = os.path.getsize(results)
subprocess.run(["rm", "-rf", results], check = True)
if test_cond == "del":
subprocess.run(["rm", "-rf", test_file_name], check = True)
size_bytes_avg += size_bytes
i += 1
size_bytes_avg /= len(test_files)
return size_bytes_avg / 1000.0

# Read the desired test file set from command-line input
test_config = ""
if len(sys.argv) >= 2:
test_config = sys.argv[1] # Can be either "smoke", "mini", "med", or "big"
else:
raise ValueError("No arguments provided")


# Output the benchmark results, either in a Bencher JSON format, or a standard
# command-line format
if "bencher" in test_config:
bencher_json = {
"FlatGFA File Size Average": {
"Average File Size": {"value": round(benchmark(test_config), 2)},
}
}
json.dump(bencher_json, sys.stdout)
else:
print(f"File Size Average: {round(benchmark(test_config), 2)} KB")

# Command format: python bench/filesize_benchmark_web.py [size](_bencher) (del)
# () = optional, [] = replace with value
Loading
Loading