Skip to content

Commit a7e55a8

Browse files
winteredmpreinerzmylinxi99Tomaqa
authored
2025 webpage (#205)
* bitwuzla: Change options for parallel track. (#201) * bitwuzla: Change options for parallel track. Add additional configurations for lower CPU core counts. * Make additional configurations non-competitive. * submission: add additional configurations for lower CPU core counts. (#202) * SMTS submission 2025 update (bug fix + 2 more versions) (#203) * present derived solvers' increment over base solver * cosmetics * interim * minor * fixing CI issues * establishing consistency among submissions with multiple cores * 2024 -> 2025 * 2024 -> 2025 * target for cleaning up web results * feat: show legends for base and non-competing only when at least one solver present * feat: non-competitive divisions will be marked * skip non-competing solvers on podiums * fix: -- -> - * fix: missing case for determining competitive divisions * fix: competitive solvers * fix competitive divisions * consistency submission vs. base solver * refactor * fix: ordering of solvers * CI * Revert "Merge branch '2025_webpage' of github.com:SMT-COMP/smt-comp.github.io into 2025_webpage" This reverts commit c414881, reversing changes made to 061c204. * disable dead solver links --------- Co-authored-by: Mathias Preiner <mathias.preiner@gmail.com> Co-authored-by: Mengyu Zhao <zmylinxi99@gmail.com> Co-authored-by: Tomaqa <tomaqa@gmail.com>
1 parent 01c9928 commit a7e55a8

24 files changed

Lines changed: 289 additions & 160 deletions

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ build: clean-build ## Build wheel file using poetry
3131
clean-build: ## clean build artifacts
3232
@rm -rf dist
3333

34+
.PHONY: clean-web-results
35+
clean-web-results: ## clean web results
36+
@rm -rf web/content/results
37+
@rm -rf web/public/results
38+
39+
3440
.PHONY: help
3541
help:
3642
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'

data/results-mv-2025.json.gz

-3.88 KB
Binary file not shown.

smtcomp/benchexec.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def get_suffix(track: defs.Track) -> str:
2828
return "_parallel"
2929
case defs.Track.Cloud:
3030
return "_cloud"
31+
case _:
32+
raise ValueError(f"Unhandled track: {track}")
3133

3234

3335
def get_xml_name(s: defs.Submission, track: defs.Track, division: defs.Division) -> str:
@@ -46,9 +48,8 @@ class CmdTask(BaseModel):
4648
taskdirs: List[str]
4749

4850

49-
def generate_benchmark_yml(
50-
ymlfile: Path, benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]
51-
) -> None:
51+
def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]) -> None:
52+
ymlfile = benchmark.with_suffix(".yml")
5253
with ymlfile.open("w") as f:
5354
f.write("format_version: '2.0'\n\n")
5455

smtcomp/defs.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def name_is_default_field(cls, data: Any) -> Any:
135135

136136
class SolverType(EnumAutoInt):
137137
wrapped = "wrapped"
138-
derived = "derived"
138+
derived = "derived" # TODO: put a datatype information on base solver
139139
standalone = "Standalone"
140140
portfolio = "Portfolio"
141141

@@ -1317,6 +1317,7 @@ class Submission(BaseModel, extra="forbid"):
13171317
website: HttpUrl
13181318
system_description: HttpUrl
13191319
solver_type: SolverType
1320+
# TODO add field base_solver?
13201321
participations: Participations
13211322
seed: int | None = None
13221323
competitive: bool = True
@@ -1325,6 +1326,7 @@ class Submission(BaseModel, extra="forbid"):
13251326
description="Must be set for the final version of the submission. An archive on zenodo is needed in this case.",
13261327
)
13271328

1329+
# TODO: model validator to check the sanity of the new base_solver field
13281330
@model_validator(mode="after")
13291331
def check_archive(self) -> Submission:
13301332
if self.archive is None and not all(p.archive for p in self.participations.root):
@@ -1493,6 +1495,11 @@ class Config:
14931495
"""
14941496
Commit of the model validator dolmen (branch smtcomp-2023)
14951497
"""
1498+
dolmen_force_logic_ALL = False
1499+
"""
1500+
Some benchmarks are not accepted by dolmen in their current logic.
1501+
During model validation we can rerun by forcing logic ALL to accept more models
1502+
"""
14961503

14971504
removed_benchmarks = [
14981505
{
@@ -1510,11 +1517,26 @@ class Config:
15101517
Benchmarks to remove before selection
15111518
"""
15121519

1513-
removed_results = []
1520+
removed_results: list[Any] = []
1521+
15141522
"""
15151523
Benchmarks to remove after running the solvers. Can be used when the selection has already been done.
15161524
"""
15171525

1526+
"""
1527+
Solver -> Base solver map for 2025
1528+
TODO: refactor this into Submission
1529+
"""
1530+
baseSolverMap2025 = {
1531+
"Bitwuzla-MachBV": "Bitwuzla-MachBV-base",
1532+
"Z3-Inc-Z3++": "Z3-Inc-Z3++-base",
1533+
"Z3-Noodler-Mocha": "Z3-Noodler-Mocha-base",
1534+
"Z3-Owl": "Z3-Owl-base",
1535+
"Z3-Noodler": "Z3-Noodler",
1536+
"z3siri": "z3siri-base",
1537+
"Z3-alpha": "Z3-alpha-base",
1538+
}
1539+
15181540
def __init__(self, data: Path | None) -> None:
15191541
self.id = self.__class__.__next_id__
15201542
self.__class__.__next_id__ += 1
@@ -1578,6 +1600,10 @@ def submissions(self) -> list[Submission]:
15781600
Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json")
15791601
]
15801602

1603+
@functools.cached_property
1604+
def competitive_solvers(self) -> list[str]:
1605+
return [s.name for s in self.submissions if s.competitive]
1606+
15811607
@functools.cached_property
15821608
def web_results(self) -> Path:
15831609
return self.data / ".." / "web" / "content" / "results"

smtcomp/generate_website_page.py

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def page_track_suffix(track: defs.Track) -> str:
9494

9595
class PodiumStep(BaseModel):
9696
name: str
97+
baseSolver: str
98+
deltaBaseSolver: int
9799
competing: str # yes or no
98100
errorScore: int
99101
correctScore: int
@@ -115,6 +117,7 @@ class PodiumDivision(BaseModel):
115117
participants: str # participants_2023
116118
disagreements: str # disagreements_2023
117119
division: str # Arith
120+
is_competitive: bool # true = least 2 subst. different solvers were submitted
118121
track: track_name
119122
n_benchmarks: int
120123
time_limit: int
@@ -237,14 +240,27 @@ class Podium(RootModel):
237240
root: PodiumDivision | PodiumCrossDivision | PodiumSummaryResults = Field(..., discriminator="layout")
238241

239242

240-
def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
243+
def podium_steps(config: defs.Config, podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
241244
if podium is None:
242245
return []
243246
else:
244-
return [
245-
PodiumStep(
247+
podiums = []
248+
non_competitive = []
249+
for s in podium:
250+
cscore = s["correctly_solved_score"]
251+
delta = 0
252+
derived_solver = defs.Config.baseSolverMap2025.get(s["solver"], "")
253+
if derived_solver != "":
254+
for sprime in podium:
255+
if sprime["solver"] == defs.Config.baseSolverMap2025.get(s["solver"], ""):
256+
delta = cscore - sprime["correctly_solved_score"]
257+
break
258+
259+
ps = PodiumStep(
246260
name=s["solver"],
247-
competing="yes", # TODO
261+
baseSolver=derived_solver,
262+
deltaBaseSolver=delta,
263+
competing="yes" if s["solver"] in config.competitive_solvers else "no",
248264
errorScore=s["error_score"],
249265
correctScore=s["correctly_solved_score"],
250266
CPUScore=s["cpu_time_score"],
@@ -257,36 +273,70 @@ def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
257273
timeout=s["timeout"],
258274
memout=s["memout"],
259275
)
260-
for s in podium
261-
]
276+
277+
if not s["solver"] in config.competitive_solvers:
278+
non_competitive.append(ps)
279+
else:
280+
podiums.append(ps)
281+
282+
return podiums + non_competitive
262283

263284

264-
def make_podium(config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track) -> PodiumDivision:
285+
def make_podium(
286+
config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track, results: pl.LazyFrame
287+
) -> PodiumDivision:
265288
def get_winner(l: List[dict[str, str]] | None) -> str:
266-
# TODO select only participating
289+
if l is None or not l:
290+
return "-"
291+
292+
l = [e for e in l if e["solver"] in config.competitive_solvers]
293+
267294
if l is None or not l or l[0]["correctly_solved_score"] == 0:
268295
return "-"
269296
else:
270297
return l[0]["solver"]
271298

299+
def is_competitive_division(results: pl.LazyFrame, division: int, for_division: bool) -> bool:
300+
"""
301+
A division in a track is competitive if at least two substantially different
302+
solvers (i.e., solvers from two different teams) were submitted.
303+
"""
304+
305+
solvers = (
306+
results.filter(pl.col("division" if for_division else "logic") == division)
307+
.select("solver")
308+
.unique()
309+
.collect()
310+
.get_column("solver")
311+
.to_list()
312+
)
313+
314+
# Avoid solvers of the same solver family under the assumption
315+
# of the following format: <solver-family>-<suffix> (holds for SMT-COMP 2025)
316+
# TODO: improve this criterion in the future
317+
return len(set([sol.split("-")[0].lower() for sol in solvers])) >= 2
318+
272319
if for_division:
320+
competitive_division = is_competitive_division(results, d["division"], for_division)
273321
division = defs.Division.name_of_int(d["division"])
274322
logics = dict((defs.Logic.name_of_int(d2["logic"]), d2["n"]) for d2 in d["logics"])
275323
else:
276324
division = defs.Logic.name_of_int(d["logic"])
325+
competitive_division = is_competitive_division(results, d["logic"], for_division)
277326
logics = dict()
278327

279328
if (track == defs.Track.Cloud) | (track == defs.Track.Parallel):
280329
winner_seq = "-"
281330
steps_seq = []
282331
else:
283332
winner_seq = get_winner(d[smtcomp.scoring.Kind.seq.name])
284-
steps_seq = podium_steps(d[smtcomp.scoring.Kind.seq.name])
333+
steps_seq = podium_steps(config, d[smtcomp.scoring.Kind.seq.name])
285334

286335
return PodiumDivision(
287-
resultdate="2024-07-08",
336+
resultdate="2025-08-11",
288337
year=config.current_year,
289338
divisions=f"divisions_{config.current_year}",
339+
is_competitive=competitive_division,
290340
participants=f"participants_{config.current_year}",
291341
disagreements=f"disagreements_{config.current_year}",
292342
division=division,
@@ -301,15 +351,15 @@ def get_winner(l: List[dict[str, str]] | None) -> str:
301351
winner_unsat=get_winner(d[smtcomp.scoring.Kind.unsat.name]),
302352
winner_24s=get_winner(d[smtcomp.scoring.Kind.twentyfour.name]),
303353
sequential=steps_seq,
304-
parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]),
305-
sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]),
306-
unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]),
307-
twentyfour=podium_steps(d[smtcomp.scoring.Kind.twentyfour.name]),
354+
parallel=podium_steps(config, d[smtcomp.scoring.Kind.par.name]),
355+
sat=podium_steps(config, d[smtcomp.scoring.Kind.sat.name]),
356+
unsat=podium_steps(config, d[smtcomp.scoring.Kind.unsat.name]),
357+
twentyfour=podium_steps(config, d[smtcomp.scoring.Kind.twentyfour.name]),
308358
)
309359

310360

311361
def sq_generate_datas(
312-
config: defs.Config, results: pl.LazyFrame, for_division: bool, track: defs.Track
362+
config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, for_division: bool, track: defs.Track
313363
) -> dict[str, PodiumDivision]:
314364
"""
315365
Generate datas for divisions or for logics
@@ -322,12 +372,13 @@ def sq_generate_datas(
322372
group_by = "logic"
323373
name_of_int = defs.Logic.name_of_int
324374

375+
selection = selection.filter(selected=True)
376+
325377
# TODO it should be done after filter_for
326-
len_by_division = results.group_by(group_by).agg(total=pl.col("file").n_unique())
378+
len_by_division = selection.group_by(group_by).agg(total=pl.len())
327379

328380
def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, results: pl.LazyFrame) -> pl.LazyFrame:
329381
results = smtcomp.scoring.filter_for(kind, config, results)
330-
331382
return (
332383
sort(
333384
intersect(results, len_by_division, on=[group_by])
@@ -368,8 +419,8 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result
368419

369420
if for_division:
370421
lf_logics = [
371-
results.group_by("division", "logic")
372-
.agg(n=pl.col("file").n_unique())
422+
selection.group_by("division", "logic")
423+
.agg(n=pl.len())
373424
.group_by("division")
374425
.agg(logics=pl.struct("logic", "n"))
375426
]
@@ -382,7 +433,7 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result
382433

383434
df = r.collect()
384435

385-
return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track)) for d in df.to_dicts())
436+
return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track, results)) for d in df.to_dicts())
386437

387438

388439
def get_kind(a: PodiumDivision, k: smtcomp.scoring.Kind) -> list[PodiumStep]:
@@ -471,7 +522,7 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str:
471522
winner_seq = get_winner(sequential)
472523

473524
return PodiumBiggestLead(
474-
resultdate="2024-07-08",
525+
resultdate="2025-08-11",
475526
year=config.current_year,
476527
track=track,
477528
results=f"results_{config.current_year}",
@@ -589,7 +640,7 @@ def get_winner(
589640
winner_seq = get_winner(sequential, scores, data, track)
590641

591642
return PodiumBestOverall(
592-
resultdate="2024-07-08",
643+
resultdate="2025-08-11",
593644
year=config.current_year,
594645
track=track,
595646
results=f"results_{config.current_year}",
@@ -684,7 +735,7 @@ def timeScore(vws_step: PodiumStep) -> float:
684735
steps_seq = ld[smtcomp.scoring.Kind.seq]
685736

686737
return PodiumLargestContribution(
687-
resultdate="2024-07-08",
738+
resultdate="2025-08-11",
688739
year=config.current_year,
689740
track=track,
690741
results=f"results_{config.current_year}",
@@ -702,7 +753,9 @@ def timeScore(vws_step: PodiumStep) -> float:
702753
)
703754

704755

705-
def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.Track) -> PodiumLargestContribution:
756+
def largest_contribution(
757+
config: defs.Config, selection: pl.LazyFrame, scores: pl.LazyFrame, track: defs.Track
758+
) -> PodiumLargestContribution:
706759
for_division = True
707760
# For each solver compute its corresponding best solver
708761
# TODO: check what is competitive solver (unsound?)
@@ -727,11 +780,10 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
727780
pl.min("cpu_time_score"),
728781
sound_status=pl.col("sound_status").first(),
729782
answer=pl.col("answer").first(),
730-
logic=-1,
731783
)
732784
.with_columns(solver=pl.lit("virtual"), error_score=0)
733785
)
734-
virtual_datas = sq_generate_datas(config, virtual_scores, for_division, track)
786+
virtual_datas = sq_generate_datas(config, selection, virtual_scores, for_division, track)
735787

736788
# For each solver Compute virtual solver without the solver
737789
solvers = scores.select("division", "solver").unique()
@@ -749,10 +801,11 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
749801
sound_status=pl.col("sound_status").first(),
750802
error_score=0,
751803
answer=pl.col("answer").first(),
752-
logic=-1,
753804
)
754805
)
755-
virtual_without_solver_datas = sq_generate_datas(config, virtual_without_solver_scores, for_division, track)
806+
virtual_without_solver_datas = sq_generate_datas(
807+
config, selection, virtual_without_solver_scores, for_division, track
808+
)
756809

757810
large = largest_contribution_ranking(config, virtual_datas, virtual_without_solver_datas, ratio_by_division, track)
758811

@@ -764,8 +817,7 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
764817
return large
765818

766819

767-
def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track) -> None:
768-
820+
def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, track: defs.Track) -> None:
769821
page_suffix = page_track_suffix(track)
770822

771823
dst = config.web_results
@@ -780,7 +832,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track
780832
all_divisions: list[PodiumDivision] = []
781833

782834
for for_division in [True, False]:
783-
datas = sq_generate_datas(config, scores, for_division, track)
835+
datas = sq_generate_datas(config, selection, scores, for_division, track)
784836

785837
for name, data in datas.items():
786838
(dst / f"{name.lower()}-{page_suffix}.md").write_text(data.model_dump_json(indent=1))
@@ -795,7 +847,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track
795847
bigdata = biggest_lead_ranking(config, datas, track)
796848
(dst / f"biggest-lead-{page_suffix}.md").write_text(bigdata.model_dump_json(indent=1))
797849

798-
largedata = largest_contribution(config, scores, track)
850+
largedata = largest_contribution(config, selection, scores, track)
799851
(dst / f"largest-contribution-{page_suffix}.md").write_text(largedata.model_dump_json(indent=1))
800852

801853
all_divisions.sort(key=lambda x: x.division)

0 commit comments

Comments
 (0)