22
33from __future__ import annotations
44
5+ import hashlib
56from pathlib import Path
67
78import fire
89import pandas as pd
10+ from deprecated import deprecated
911from omegaconf import OmegaConf
12+ from platformdirs import user_cache_dir
1013from rich .progress import track
1114
1215from carps .utils .loggingutils import get_logger
1316
1417logger = get_logger ("ConfigIndexer" )
1518
19+
1620config_folder = Path (__file__ ).parent .parent / "configs"
1721config_folder_task = config_folder / "task"
1822config_folder_optimizer = config_folder / "optimizer"
1923
24+ PATH_KEY_ZIP = {
25+ config_folder_task : "task_id" ,
26+ config_folder_optimizer : "optimizer_id" ,
27+ }
28+
2029
2130def index_configs (extra_task_paths : list [str ] | None = None , extra_optimizer_paths : list [str ] | None = None ) -> None :
2231 """Index all task and optimizer configs.
@@ -31,22 +40,13 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
3140 extra_optimizer_paths : list[str], optional
3241 Extra paths to custom optimizers, must be a folder containing only optimizer configs.
3342 """
34- config_folder_tasks = [config_folder_task ] if extra_task_paths is None else [config_folder_task , * extra_task_paths ] # type: ignore[list-item]
35- config_folder_tasks = [Path (p ) for p in config_folder_tasks ]
36- config_folder_optimizers = (
37- [config_folder_optimizer ]
38- if extra_optimizer_paths is None
39- else [config_folder_optimizer , * extra_optimizer_paths ] # type: ignore[list-item]
40- )
41- config_folder_optimizers = [Path (p ) for p in config_folder_optimizers ]
42- for key , paths in zip (["task_id" , "optimizer_id" ], [config_folder_tasks , config_folder_optimizers ], strict = False ):
43- logger .info (f"Search configs for { key } from { paths } ..." )
44- filenames = []
45- for path in paths :
46- filenames .extend (list (path .glob ("**/*.yaml" )))
43+ register_extra_paths (extra_task_paths , extra_optimizer_paths )
44+
45+ for path , key in PATH_KEY_ZIP .items ():
46+ paths = list (path .glob ("**/*.yaml" ))
4747
4848 table_list = []
49- for fn in track (filenames , total = len (filenames ), description = f"Gathering for { key } ..." ):
49+ for fn in track (paths , total = len (paths ), description = f"Gathering for { key } ..." ):
5050 cfg = OmegaConf .load (fn )
5151 value = cfg .get (key )
5252 table_list .append (
@@ -59,5 +59,78 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
5959 table .to_csv (paths [0 ] / "index.csv" , index = False )
6060
6161
62+ def create_table (key , paths : list [Path ], target : Path ) -> None :
63+ """Create index table."""
64+ table_list = []
65+ for p in paths :
66+ cfg = OmegaConf .load (p )
67+ value = cfg .get (key )
68+ table_list .append (
69+ {
70+ "config_fn" : str (p ),
71+ key : value ,
72+ }
73+ )
74+ table = pd .DataFrame (table_list )
75+ table .to_csv (target , index = False )
76+
77+
78+ def hash_inputs (paths : list [Path ]) -> str :
79+ """Hash inputs so that index file can be cached."""
80+ hasher = hashlib .sha256 ()
81+ for path in sorted (paths ):
82+ with open (path , "rb" ) as f :
83+ while chunk := f .read (16 * 1024 * 1024 ):
84+ hasher .update (chunk )
85+ return hasher .hexdigest ()
86+
87+
88+ def register_extra_paths (extra_task_paths : list [str ] | None , extra_optimizer_paths : list [str ] | None ) -> None :
89+ """Register extra task and optimizer paths.
90+
91+ Parameters
92+ ----------
93+ extra_task_paths : list[str]
94+ Extra paths to custom tasks, must be a folder containing only task configs.
95+ extra_optimizer_paths : list[str]
96+ Extra paths to custom optimizers, must be a folder containing only optimizer configs.
97+ """
98+ if not extra_task_paths :
99+ extra_task_paths = []
100+ if not extra_optimizer_paths :
101+ extra_optimizer_paths = []
102+
103+ for optimizer_path_str in extra_optimizer_paths :
104+ PATH_KEY_ZIP [Path (optimizer_path_str )] = "optimizer_id"
105+ for task_path_str in extra_task_paths :
106+ PATH_KEY_ZIP [Path (task_path_str )] = "task_id"
107+
108+
109+ def get_index_config (path : Path ) -> pd .DataFrame :
110+ """Index all task and optimizer configs.
111+
112+ Create `index.csv` containing the config filename `config_fn` and the
113+ `task_id` or `optimizer_id` for all task and optimizer configs.
114+ Replaces old indexing api by using caching directory
115+
116+ Parameters:
117+ ----------
118+ path: path the old index file would have been
119+
120+ returns: pd.DataFrame containing the index
121+ """
122+ path_dashed = str (path .parent ).replace ("/" , "-" )
123+
124+ paths = list (path .parent .glob ("**/*.yaml" ))
125+ paths_hash = hash_inputs (paths )[:12 ]
126+
127+ cache_path = Path (user_cache_dir ("carps" )) / f"index-{ path_dashed } -{ paths_hash } .csv"
128+ if not cache_path .is_file ():
129+ cache_path .parent .mkdir (exist_ok = True , parents = True )
130+ create_table (PATH_KEY_ZIP [path .parent ], paths , cache_path )
131+
132+ return pd .read_csv (cache_path )
133+
134+
62135if __name__ == "__main__" :
63136 fire .Fire (index_configs )
0 commit comments