Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ nwb_benchmarks = "nwb_benchmarks.command_line_interface:main"
app = ["flask", "flask-cors", "flask_restx"]
database = ["polars"]
dev = ["ipython", "pre-commit"]
figures = ["seaborn", "matplotlib", "pyarrow"]
all = [
{include-group = "app"},
{include-group = "database"},
{include-group = "dev"}
{include-group = "dev"},
{include-group = "figures"}
]


Expand Down
246 changes: 3 additions & 243 deletions src/nwb_benchmarks/_flask/_app.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import dataclasses
import json
import os
import pathlib
import subprocess
import time
import traceback
import typing
import uuid
from datetime import datetime
from pathlib import Path

import flask
import flask_restx
import packaging.version
import typing_extensions

from nwb_benchmarks.database._processing import repackage_as_parquet

app = flask.Flask(__name__)
api = flask_restx.Api(
Expand Down Expand Up @@ -99,7 +96,7 @@ def post(self) -> int:

time.sleep(1)

repackage_as_parquet(directory=directory, output_directory=output_directory)
repackage_as_parquet(directory=directory, output_directory=output_directory, minimum_version="3.0.0")

time.sleep(1)

Expand Down Expand Up @@ -208,243 +205,6 @@ def push(self):
raise RuntimeError(message)


@dataclasses.dataclass
class Machine:
id: str
name: str
version: str
os: dict
sys: dict
platform: dict
psutil: dict
cuda: dict
asv: dict

@classmethod
def safe_load_from_json(cls, file_path: pathlib.Path) -> typing_extensions.Self | None:
with file_path.open(mode="r") as file_stream:
data = json.load(file_stream)

version = str(data.get("version", None))
if version is None or packaging.version.Version(version) < packaging.version.Version(version="1.1.0"):
return None

machine_id = file_path.stem.removeprefix("machine-")

return cls(
id=machine_id,
name=data.get("name", ""),
version=version,
os=data.get("os", {}),
sys=data.get("sys", {}),
platform=data.get("platform", {}),
psutil=data.get("psutil", {}),
cuda=data.get("cuda", {}),
asv=data.get("asv", {}),
)

def to_dataframe(self) -> "polars.DataFrame":
import polars

data = {
"name": self.name,
"version": self.version,
"os": json.dumps(self.os),
"sys": json.dumps(self.sys),
"platform": json.dumps(self.platform),
"psutil": json.dumps(self.psutil),
"cuda": json.dumps(self.cuda),
"asv": json.dumps(self.asv),
}

data_frame = polars.DataFrame(data=data)
return data_frame


@dataclasses.dataclass
class Environment:
environment_id: str
preamble: str

# Allow arbitrary fields
def __init__(self, environment_id: str, preamble: str, **kwargs) -> None:
self.environment_id = environment_id
self.preamble = preamble
for key, value in kwargs.items():
setattr(self, key, value)

@classmethod
def safe_load_from_json(cls, file_path: pathlib.Path) -> typing_extensions.Self | None:
with file_path.open(mode="r") as file_stream:
data = json.load(fp=file_stream)

if len(data) > 1:
return None

environment_id = file_path.stem.removeprefix("environment-")
preamble = next(iter(data.keys()))

packages = {
package["name"]: f'{package["version"]} ({package["build"]})'
for package in data[preamble]
if len(package) == 3
}

if not any(packages):
return None

return cls(environment_id=environment_id, preamble=preamble, **packages)

def to_dataframe(self) -> "polars.DataFrame":
import polars

data = {
"environment_id": self.environment_id,
"preamble": self.preamble,
}
for package_name, package_details in self.__dict__.items():
if package_name not in ["environment_id", "preamble"]:
data[package_name] = package_details

data_frame = polars.DataFrame(data=data, orient="col")
return data_frame


@dataclasses.dataclass
class Result:
uuid: str
version: str
timestamp: str
commit_hash: str
environment_id: str
machine_id: str
benchmark_name: str
parameter_case: str
value: float
variable: str


@dataclasses.dataclass
class Results:
results: list[Result]

@classmethod
def safe_load_from_json(cls, file_path: pathlib.Path) -> typing_extensions.Self | None:
with file_path.open(mode="r") as file_stream:
data = json.load(fp=file_stream)

database_version = data.get("database_version", None)
if database_version is None or packaging.version.Version(data["database_version"]) < packaging.version.Version(
version="1.0.0"
):
return None

timestamp = data["timestamp"]
commit_hash = data["commit_hash"]
environment_id = data["environment_id"]
machine_id = data["machine_id"]

def normalize_time_and_network_results(benchmark_results) -> dict:
"""Convert benchmark results to a consistent dict format with list values."""
if isinstance(benchmark_results, dict):
value_dict = benchmark_results
else:
value_dict = dict(time=benchmark_results)

# Ensure all values are lists
return {k: v if isinstance(v, list) else [float(v)] for k, v in value_dict.items()}

results = [
Result(
uuid=str(uuid.uuid4()), # TODO: add this to each results file so it is persistent
version=database_version,
timestamp=timestamp,
commit_hash=commit_hash,
environment_id=environment_id,
machine_id=machine_id,
benchmark_name=benchmark_name,
parameter_case=parameter_case,
value=value,
variable=variable_name,
)
for benchmark_name, parameter_cases in data["results"].items()
for parameter_case, benchmark_results in parameter_cases.items()
for variable_name, values in normalize_time_and_network_results(benchmark_results).items()
for value in values
]

return cls(results=results)

def to_dataframe(self) -> "polars.DataFrame":
import polars

data = {
"uuid": [result.uuid for result in self.results],
"version": [result.version for result in self.results],
"commit_hash": [result.commit_hash for result in self.results],
"environment_id": [result.environment_id for result in self.results],
"machine_id": [result.machine_id for result in self.results],
"benchmark_name": [result.benchmark_name for result in self.results],
"parameter_case": [result.parameter_case for result in self.results],
"value": [result.value for result in self.results],
"variable": [result.variable for result in self.results],
}

data_frame = polars.DataFrame(data=data)
return data_frame


def repackage_as_parquet(directory: pathlib.Path, output_directory: pathlib.Path) -> None:
import polars

# Machines
machines_data_frames = []
machines_directory = directory / "machines"
for machine_file_path in machines_directory.iterdir():
machine = Machine.safe_load_from_json(file_path=machine_file_path)

if machine is None:
continue

machine_data_frame = machine.to_dataframe()
machines_data_frames.append(machine_data_frame)
machines_database = polars.concat(items=machines_data_frames, how="diagonal_relaxed")

machines_database_file_path = output_directory / "machines.parquet"
machines_database.write_parquet(file=machines_database_file_path)

# Environments
environments_data_frames = []
environments_directory = directory / "environments"
for environment_file_path in environments_directory.iterdir():
environment = Environment.safe_load_from_json(file_path=environment_file_path)

if environment is None:
continue

environment_data_frame = environment.to_dataframe()
environments_data_frames.append(environment_data_frame)
environments_database = polars.concat(items=environments_data_frames, how="diagonal")

environments_database_file_path = output_directory / "environments.parquet"
environments_database.write_parquet(file=environments_database_file_path)

# Results
all_results_data_frames = []
results_directory = directory / "results"
for result_file_path in results_directory.iterdir():
results = Results.safe_load_from_json(file_path=result_file_path)

if results is None:
continue
results_data_frame = results.to_dataframe()
all_results_data_frames.append(results_data_frame)
all_results_database = polars.concat(items=all_results_data_frames, how="diagonal")

all_results_database_file_path = output_directory / "results.parquet"
all_results_database.write_parquet(file=all_results_database_file_path)


if __name__ == "__main__":
DEBUG_MODE = os.environ.get("NWB_BENCHMARKS_DEBUG", None)
if DEBUG_MODE is not None and DEBUG_MODE != "1":
Expand Down
16 changes: 16 additions & 0 deletions src/nwb_benchmarks/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Exposed imports to the `database` submodule."""

from ._models import Environment, Machine, Result, Results
from ._processing import (
concat_dataclasses_to_parquet,
repackage_as_parquet,
)

__all__ = [
"Machine",
"Result",
"Results",
"Environment",
"concat_dataclasses_to_parquet",
"repackage_as_parquet",
]
Loading
Loading