|
210 | 210 | color: white; |
211 | 211 | border-color: #0066cc; |
212 | 212 | } |
| 213 | + .leaderboard-table th[colspan] { |
| 214 | + text-align: center; |
| 215 | + } |
| 216 | + .leaderboard-table th[data-column] { |
| 217 | + text-align: center; |
| 218 | + } |
| 219 | + .leaderboard-table td { |
| 220 | + text-align: center; |
| 221 | + } |
| 222 | + .leaderboard-table td.model-name { |
| 223 | + text-align: left; |
| 224 | + } |
213 | 225 |
|
214 | 226 | /* Media queries for responsive design */ |
215 | 227 | @media screen and (max-width: 768px) { |
@@ -376,6 +388,30 @@ <h2>📊 Leaderboard (<span id="current-temp">T=0</span>)</h2> |
376 | 388 | * GPT-o4-mini:high was measured under T=1 since o-series models only support T=1. |
377 | 389 | </p> |
378 | 390 |
|
| 391 | + <h2>📊 solve<sub>s</sub>@i Metrics (T=0)</h2> |
| 392 | + <table class="leaderboard-table" id="solve-metrics"> |
| 393 | + <thead> |
| 394 | + <tr> |
| 395 | + <th rowspan="2" data-column="Model">Model</th> |
| 396 | + <th colspan="3">Stage 3 (Verification)</th> |
| 397 | + <th colspan="3">Stage 2 (Solution Gen.)</th> |
| 398 | + <th colspan="3">Stage 1 (Execution)</th> |
| 399 | + </tr> |
| 400 | + <tr> |
| 401 | + <th data-column="solve@10 stage3" class="sort-desc">@10</th> |
| 402 | + <th data-column="solve@5 stage3">@5</th> |
| 403 | + <th data-column="solve@1 stage3">@1</th> |
| 404 | + <th data-column="solve@10 stage2">@10</th> |
| 405 | + <th data-column="solve@5 stage2">@5</th> |
| 406 | + <th data-column="solve@1 stage2">@1</th> |
| 407 | + <th data-column="solve@10 stage1">@10</th> |
| 408 | + <th data-column="solve@5 stage1">@5</th> |
| 409 | + <th data-column="solve@1 stage1">@1</th> |
| 410 | + </tr> |
| 411 | + </thead> |
| 412 | + <tbody></tbody> |
| 413 | + </table> |
| 414 | + |
379 | 415 | <h2>📚 Citation</h2> |
380 | 416 | <div class="citation-box">@article{chen2025heurigym, |
381 | 417 | title={HeuriGym: An Agentic Benchmark for LLM-Crafted Heuristics in Combinatorial Optimization}, |
@@ -644,8 +680,77 @@ <h2>📚 Citation</h2> |
644 | 680 | d3.csv("results/model_metadata.csv").then(data => { |
645 | 681 | metadata = data; |
646 | 682 | loadData(currentTemperature); |
| 683 | + loadSolveMetrics(); |
647 | 684 | }); |
648 | 685 |
|
| 686 | + function loadSolveMetrics() { |
| 687 | + d3.csv("results/solve_metrics_summary.csv").then(data => { |
| 688 | + const tbody = d3.select("#solve-metrics tbody"); |
| 689 | + tbody.selectAll("tr").remove(); |
| 690 | + |
| 691 | + // Sort data by stage 3@10 by default |
| 692 | + data.sort((a, b) => d3.descending(+a["solve@10 stage3"], +b["solve@10 stage3"])); |
| 693 | + |
| 694 | + // Store the data in a variable accessible to the click handler |
| 695 | + window.solveMetricsData = data; |
| 696 | + |
| 697 | + function updateTable(sortedData) { |
| 698 | + tbody.selectAll("tr").remove(); |
| 699 | + |
| 700 | + sortedData.forEach(d => { |
| 701 | + const row = tbody.append("tr"); |
| 702 | + const modelCell = row.append("td").attr("class", "model-name"); |
| 703 | + const modelLink = modelCell.append("a") |
| 704 | + .attr("href", metadata.find(m => m.Model === d.Model)?.URL || "#") |
| 705 | + .attr("target", "_blank") |
| 706 | + .text(d.Model); |
| 707 | + |
| 708 | + // Add all solve metrics with percentage formatting |
| 709 | + const metrics = [ |
| 710 | + "solve@10 stage3", "solve@5 stage3", "solve@1 stage3", |
| 711 | + "solve@10 stage2", "solve@5 stage2", "solve@1 stage2", |
| 712 | + "solve@10 stage1", "solve@5 stage1", "solve@1 stage1" |
| 713 | + ]; |
| 714 | + |
| 715 | + metrics.forEach(metric => { |
| 716 | + row.append("td").text((d[metric] * 100).toFixed(2) + "%"); |
| 717 | + }); |
| 718 | + }); |
| 719 | + } |
| 720 | + |
| 721 | + // Initial table render |
| 722 | + updateTable(data); |
| 723 | + |
| 724 | + // Add click handlers to table headers |
| 725 | + d3.selectAll("#solve-metrics th[data-column]").on("click", function() { |
| 726 | + const column = this.getAttribute("data-column"); |
| 727 | + const isAscending = this.classList.contains("sort-asc"); |
| 728 | + |
| 729 | + // Sort the data |
| 730 | + const sortedData = [...window.solveMetricsData].sort((a, b) => { |
| 731 | + if (column === "Model") { |
| 732 | + return isAscending ? |
| 733 | + b[column].localeCompare(a[column]) : |
| 734 | + a[column].localeCompare(b[column]); |
| 735 | + } |
| 736 | + return isAscending ? |
| 737 | + d3.descending(+a[column], +b[column]) : |
| 738 | + d3.ascending(+a[column], +b[column]); |
| 739 | + }); |
| 740 | + |
| 741 | + // Update sort indicators |
| 742 | + d3.selectAll("#solve-metrics th") |
| 743 | + .classed("sort-asc", false) |
| 744 | + .classed("sort-desc", false); |
| 745 | + d3.select(this) |
| 746 | + .classed(isAscending ? "sort-desc" : "sort-asc", true); |
| 747 | + |
| 748 | + // Update the table with sorted data |
| 749 | + updateTable(sortedData); |
| 750 | + }); |
| 751 | + }); |
| 752 | + } |
| 753 | + |
649 | 754 | function copyCitation() { |
650 | 755 | const citationText = `@article{chen2025heurigym, |
651 | 756 | title={HeuriGym: An Agentic Benchmark for LLM-Crafted Heuristics in Combinatorial Optimization}, |
|
0 commit comments