Skip to content

Commit f5470cd

Browse files
committed
Merge branch 'development' of github.com:automl/CARP-S into development
2 parents ee407d1 + 971789c commit f5470cd

6 files changed

Lines changed: 94 additions & 28 deletions

File tree

carps/analysis/gather_data.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from carps.analysis.calc_hypervolume import add_hypervolume_to_df
2323
from carps.analysis.utils import convert_mixed_types_to_str, get_ids_mo
24+
from carps.utils.index_configs import get_index_config
2425
from carps.utils.loggingutils import get_logger, setup_logging
2526
from carps.utils.task import Task
2627
from carps.utils.trials import TrialInfo
@@ -404,9 +405,7 @@ def maybe_postadd_task(logs: pd.DataFrame, overwrite: bool = False) -> pd.DataFr
404405
logger.debug("No task_id in logs. Can't add task info.")
405406
return logs
406407
index_fn = Path(__file__).parent.parent / "configs/task/index.csv"
407-
if not index_fn.is_file():
408-
raise ValueError("ObjectiveFunction ids have not been indexed. Run `python -m carps.utils.index_configs`.")
409-
task_index = pd.read_csv(index_fn)
408+
task_index = get_index_config(index_fn)
410409

411410
new_logs = []
412411
for gid, gdf in logs.groupby(by=["task_id", "seed"]):

carps/analysis/process_data.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010
from omegaconf import DictConfig, ListConfig, OmegaConf
1111

12+
from carps.utils.index_configs import get_index_config
1213
from carps.utils.loggingutils import get_logger, setup_logging
1314

1415
setup_logging()
@@ -69,9 +70,7 @@ def maybe_postadd_task(logs: pd.DataFrame) -> pd.DataFrame:
6970
Logs with task columns.
7071
"""
7172
index_fn = Path(__file__).parent.parent / "configs/task/index.csv"
72-
if not index_fn.is_file():
73-
raise ValueError("Task ids have not been indexed. Run `python -m carps.utils.index_configs`.")
74-
task_index = pd.read_csv(index_fn)
73+
task_index = get_index_config(index_fn)
7574

7675
def load_task_cfg(task_id: str) -> DictConfig:
7776
config_fn = task_index["config_fn"][task_index["task_id"] == task_id].iloc[0]

carps/utils/index_configs.py

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,29 @@
22

33
from __future__ import annotations
44

5+
import hashlib
56
from pathlib import Path
67

78
import fire
89
import pandas as pd
910
from omegaconf import OmegaConf
11+
from platformdirs import user_cache_dir
1012
from rich.progress import track
1113

1214
from carps.utils.loggingutils import get_logger
1315

1416
logger = get_logger("ConfigIndexer")
1517

18+
1619
config_folder = Path(__file__).parent.parent / "configs"
1720
config_folder_task = config_folder / "task"
1821
config_folder_optimizer = config_folder / "optimizer"
1922

23+
PATH_KEY_ZIP = {
24+
config_folder_task: "task_id",
25+
config_folder_optimizer: "optimizer_id",
26+
}
27+
2028

2129
def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_paths: list[str] | None = None) -> None:
2230
"""Index all task and optimizer configs.
@@ -31,22 +39,13 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
3139
extra_optimizer_paths : list[str], optional
3240
Extra paths to custom optimizers, must be a folder containing only optimizer configs.
3341
"""
34-
config_folder_tasks = [config_folder_task] if extra_task_paths is None else [config_folder_task, *extra_task_paths] # type: ignore[list-item]
35-
config_folder_tasks = [Path(p) for p in config_folder_tasks]
36-
config_folder_optimizers = (
37-
[config_folder_optimizer]
38-
if extra_optimizer_paths is None
39-
else [config_folder_optimizer, *extra_optimizer_paths] # type: ignore[list-item]
40-
)
41-
config_folder_optimizers = [Path(p) for p in config_folder_optimizers]
42-
for key, paths in zip(["task_id", "optimizer_id"], [config_folder_tasks, config_folder_optimizers], strict=False):
43-
logger.info(f"Search configs for {key} from {paths}...")
44-
filenames = []
45-
for path in paths:
46-
filenames.extend(list(path.glob("**/*.yaml")))
42+
register_extra_paths(extra_task_paths, extra_optimizer_paths)
43+
44+
for path, key in PATH_KEY_ZIP.items():
45+
paths = list(path.glob("**/*.yaml"))
4746

4847
table_list = []
49-
for fn in track(filenames, total=len(filenames), description=f"Gathering for {key}..."):
48+
for fn in track(paths, total=len(paths), description=f"Gathering for {key}..."):
5049
cfg = OmegaConf.load(fn)
5150
value = cfg.get(key)
5251
table_list.append(
@@ -59,5 +58,78 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
5958
table.to_csv(paths[0] / "index.csv", index=False)
6059

6160

61+
def create_table(key: str, paths: list[Path], target: Path) -> None:
62+
"""Create index table."""
63+
table_list = []
64+
for p in paths:
65+
cfg = OmegaConf.load(p)
66+
value = cfg.get(key)
67+
table_list.append(
68+
{
69+
"config_fn": str(p),
70+
key: value,
71+
}
72+
)
73+
table = pd.DataFrame(table_list)
74+
table.to_csv(target, index=False)
75+
76+
77+
def hash_inputs(paths: list[Path]) -> str:
78+
"""Hash inputs so that index file can be cached."""
79+
hasher = hashlib.sha256()
80+
for path in sorted(paths):
81+
with open(path, "rb") as f:
82+
while chunk := f.read(16 * 1024 * 1024):
83+
hasher.update(chunk)
84+
return hasher.hexdigest()
85+
86+
87+
def register_extra_paths(extra_task_paths: list[str] | None, extra_optimizer_paths: list[str] | None) -> None:
88+
"""Register extra task and optimizer paths.
89+
90+
Parameters
91+
----------
92+
extra_task_paths : list[str]
93+
Extra paths to custom tasks, must be a folder containing only task configs.
94+
extra_optimizer_paths : list[str]
95+
Extra paths to custom optimizers, must be a folder containing only optimizer configs.
96+
"""
97+
if not extra_task_paths:
98+
extra_task_paths = []
99+
if not extra_optimizer_paths:
100+
extra_optimizer_paths = []
101+
102+
for optimizer_path_str in extra_optimizer_paths:
103+
PATH_KEY_ZIP[Path(optimizer_path_str)] = "optimizer_id"
104+
for task_path_str in extra_task_paths:
105+
PATH_KEY_ZIP[Path(task_path_str)] = "task_id"
106+
107+
108+
def get_index_config(path: Path) -> pd.DataFrame:
109+
"""Index all task and optimizer configs.
110+
111+
Create `index.csv` containing the config filename `config_fn` and the
112+
`task_id` or `optimizer_id` for all task and optimizer configs.
113+
Replaces old indexing api by using caching directory
114+
115+
Parameters:
116+
----------
117+
path: path the old index file would have been
118+
119+
returns: pd.DataFrame containing the index
120+
"""
121+
path_dashed = str(path.parent).replace("/", "-")
122+
123+
paths = list(path.parent.glob("**/*.yaml"))
124+
paths_hash = hash_inputs(paths)[:12]
125+
126+
cache_path = Path(user_cache_dir("carps")) / f"index-{path_dashed}-{paths_hash}.csv"
127+
if not cache_path.is_file():
128+
cache_path.parent.mkdir(exist_ok=True, parents=True)
129+
create_table(PATH_KEY_ZIP[path.parent], paths, cache_path)
130+
131+
return pd.read_csv(cache_path)
132+
133+
62134
if __name__ == "__main__":
63135
fire.Fire(index_configs)

carps/utils/overridefinder.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
from pathlib import Path
77

88
import fire
9-
import pandas as pd
109

11-
from carps.utils.index_configs import index_configs
10+
from carps.utils.index_configs import get_index_config
1211
from carps.utils.loggingutils import get_logger
1312

1413
logger = get_logger(__file__)
@@ -45,9 +44,7 @@ def find_override(task_id: str | None = None, optimizer_id: str | None = None) -
4544
raise ValueError("Please specify either `task_id` or `optimizer_id`.")
4645

4746
index_fn = path / "index.csv"
48-
if not index_fn.is_file():
49-
index_configs()
50-
table = pd.read_csv(index_fn)
47+
table = get_index_config(index_fn)
5148

5249
try:
5350
config_fn = table["config_fn"][table[key] == to_find].to_numpy()[0]

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ dev = [
6565
"pre-commit",
6666
"ruff",
6767
"mypy",
68+
"types-Deprecated",
6869
"mkdocs",
6970
"mkdocs-material",
7071
"mkdocs-autorefs",

subselection/create_subset_configs.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ def write_subsets(subset_fn: str, identifier: str):
2222
task_ids = subset["task_id"].to_list()
2323

2424
index_fn = config_target_path.parent.parent / "index.csv"
25-
if not index_fn.is_file():
26-
raise ValueError(f"Could not find {index_fn}. ObjectiveFunction ids have not been indexed. Run `python -m carps.utils.index_configs`.")
27-
task_index = pd.read_csv(index_fn)
25+
task_index = get_index_config(index_fn)
2826
print(task_index.head())
2927
print(task_ids)
3028
not_found = [pid for pid in task_ids if pid not in task_index["task_id"].to_list()]

0 commit comments

Comments
 (0)