22
33from __future__ import annotations
44
5+ import hashlib
56from pathlib import Path
67
78import fire
89import pandas as pd
910from omegaconf import OmegaConf
11+ from platformdirs import user_cache_dir
1012from rich .progress import track
1113
1214from carps .utils .loggingutils import get_logger
1315
1416logger = get_logger ("ConfigIndexer" )
1517
18+
1619config_folder = Path (__file__ ).parent .parent / "configs"
1720config_folder_task = config_folder / "task"
1821config_folder_optimizer = config_folder / "optimizer"
1922
23+ PATH_KEY_ZIP = {
24+ config_folder_task : "task_id" ,
25+ config_folder_optimizer : "optimizer_id" ,
26+ }
27+
2028
2129def index_configs (extra_task_paths : list [str ] | None = None , extra_optimizer_paths : list [str ] | None = None ) -> None :
2230 """Index all task and optimizer configs.
@@ -31,22 +39,13 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
3139 extra_optimizer_paths : list[str], optional
3240 Extra paths to custom optimizers, must be a folder containing only optimizer configs.
3341 """
34- config_folder_tasks = [config_folder_task ] if extra_task_paths is None else [config_folder_task , * extra_task_paths ] # type: ignore[list-item]
35- config_folder_tasks = [Path (p ) for p in config_folder_tasks ]
36- config_folder_optimizers = (
37- [config_folder_optimizer ]
38- if extra_optimizer_paths is None
39- else [config_folder_optimizer , * extra_optimizer_paths ] # type: ignore[list-item]
40- )
41- config_folder_optimizers = [Path (p ) for p in config_folder_optimizers ]
42- for key , paths in zip (["task_id" , "optimizer_id" ], [config_folder_tasks , config_folder_optimizers ], strict = False ):
43- logger .info (f"Search configs for { key } from { paths } ..." )
44- filenames = []
45- for path in paths :
46- filenames .extend (list (path .glob ("**/*.yaml" )))
42+ register_extra_paths (extra_task_paths , extra_optimizer_paths )
43+
44+ for path , key in PATH_KEY_ZIP .items ():
45+ paths = list (path .glob ("**/*.yaml" ))
4746
4847 table_list = []
49- for fn in track (filenames , total = len (filenames ), description = f"Gathering for { key } ..." ):
48+ for fn in track (paths , total = len (paths ), description = f"Gathering for { key } ..." ):
5049 cfg = OmegaConf .load (fn )
5150 value = cfg .get (key )
5251 table_list .append (
@@ -59,5 +58,78 @@ def index_configs(extra_task_paths: list[str] | None = None, extra_optimizer_pat
5958 table .to_csv (paths [0 ] / "index.csv" , index = False )
6059
6160
61+ def create_table (key : str , paths : list [Path ], target : Path ) -> None :
62+ """Create index table."""
63+ table_list = []
64+ for p in paths :
65+ cfg = OmegaConf .load (p )
66+ value = cfg .get (key )
67+ table_list .append (
68+ {
69+ "config_fn" : str (p ),
70+ key : value ,
71+ }
72+ )
73+ table = pd .DataFrame (table_list )
74+ table .to_csv (target , index = False )
75+
76+
77+ def hash_inputs (paths : list [Path ]) -> str :
78+ """Hash inputs so that index file can be cached."""
79+ hasher = hashlib .sha256 ()
80+ for path in sorted (paths ):
81+ with open (path , "rb" ) as f :
82+ while chunk := f .read (16 * 1024 * 1024 ):
83+ hasher .update (chunk )
84+ return hasher .hexdigest ()
85+
86+
87+ def register_extra_paths (extra_task_paths : list [str ] | None , extra_optimizer_paths : list [str ] | None ) -> None :
88+ """Register extra task and optimizer paths.
89+
90+ Parameters
91+ ----------
92+ extra_task_paths : list[str]
93+ Extra paths to custom tasks, must be a folder containing only task configs.
94+ extra_optimizer_paths : list[str]
95+ Extra paths to custom optimizers, must be a folder containing only optimizer configs.
96+ """
97+ if not extra_task_paths :
98+ extra_task_paths = []
99+ if not extra_optimizer_paths :
100+ extra_optimizer_paths = []
101+
102+ for optimizer_path_str in extra_optimizer_paths :
103+ PATH_KEY_ZIP [Path (optimizer_path_str )] = "optimizer_id"
104+ for task_path_str in extra_task_paths :
105+ PATH_KEY_ZIP [Path (task_path_str )] = "task_id"
106+
107+
108+ def get_index_config (path : Path ) -> pd .DataFrame :
109+ """Index all task and optimizer configs.
110+
111+ Create `index.csv` containing the config filename `config_fn` and the
112+ `task_id` or `optimizer_id` for all task and optimizer configs.
113+ Replaces old indexing api by using caching directory
114+
115+ Parameters:
116+ ----------
117+ path: path the old index file would have been
118+
119+ returns: pd.DataFrame containing the index
120+ """
121+ path_dashed = str (path .parent ).replace ("/" , "-" )
122+
123+ paths = list (path .parent .glob ("**/*.yaml" ))
124+ paths_hash = hash_inputs (paths )[:12 ]
125+
126+ cache_path = Path (user_cache_dir ("carps" )) / f"index-{ path_dashed } -{ paths_hash } .csv"
127+ if not cache_path .is_file ():
128+ cache_path .parent .mkdir (exist_ok = True , parents = True )
129+ create_table (PATH_KEY_ZIP [path .parent ], paths , cache_path )
130+
131+ return pd .read_csv (cache_path )
132+
133+
62134if __name__ == "__main__" :
63135 fire .Fire (index_configs )
0 commit comments