55
66# Run with `time python find_data.py > out_find_data.txt`
77
8+ # Standard HPSS `/home/projects/e3sm/www/WaterCycle/E3SMv1/` paths from
9+ # https://docs.e3sm.org/e3sm_data_docs/_build/html/v1/WaterCycle/simulation_data/simulation_table.html
10+ CENTRALIZED_PREFIXES : List [str ] = [
11+ "/home/projects/e3sm/www/WaterCycle/E3SMv1/LR/" ,
12+ "/home/projects/e3sm/www/WaterCycle/E3SMv1/HR/"
13+ ]
14+ # User-specific paths from
15+ # https://e3sm.atlassian.net/wiki/spaces/ED/pages/4495441922/V1+Simulation+backfill+WIP
16+ USER_PREFIXES : List [str ] = [
17+ "/home/b/beharrop/E3SM_simulations/" ,
18+ "/home/c/chengzhu/" ,
19+ "/home/d/dcomeau/cryosphere_simulations/" ,
20+ "/home/g/golaz/2018/E3SM_simulations/" ,
21+ "/home/g/golaz/2018/E3SM_simulations/repaired/" ,
22+ "/home/g/golaz/2019/E3SM_simulations/" ,
23+ "/home/g/golaz/2019/E3SM_simulations/repaired/" ,
24+ "/home/j/jinyun/CBGCv1/" ,
25+ "/home/j/jonbob/" ,
26+ "/home/m/maltrud/E3SM/" ,
27+ "/home/n/ndk/2019/" ,
28+ "/home/n/ndk/2020/" ,
29+ "/home/n/ndk/2021/" ,
30+ "/home/projects/m3412/"
31+ "/home/s/shix/E3SM/" ,
32+ "/home/t/tang30/2018/E3SM_simulations/" ,
33+ "/home/t/tang30/2018/E3SM_simulations/repaired/" ,
34+ "/home/t/tang30/2019/E3SM_simulations/" ,
35+ "/home/t/tang30/2019/E3SM_simulations/repaired/" ,
36+ "/home/q/qzhu/CBGCv1/" ,
37+ "/home/z/zshaheen/2018/E3SM_simulations/" ,
38+ "/home/z/zshaheen/2018/E3SM_simulations/repaired/" ,
39+ ]
40+ # Publication paths `/home/projects/e3sm/www/publication-archives/` noted in
41+ # https://github.com/E3SM-Project/e3sm_data_docs/pull/59#issuecomment-3063668732
42+ PUBLICATION_PREFIXES : List [str ] = [
43+ "/home/projects/e3sm/www/publication-archives/pub_archive_E3SM_1_0_"
44+ ]
45+
846class DataPathsContainer (object ):
947 def __init__ (self , simulation_name : str ):
1048 self .simulation_name : str = simulation_name
@@ -39,55 +77,15 @@ def read_simulations(csv_file: str) -> List[DataPathsContainer]:
3977 return containers
4078
4179def search_for_all_appearances (containers : List [DataPathsContainer ]):
42- # Try the standard HPSS `/home/projects/e3sm/www/WaterCycle/E3SMv1/` paths from
43- # https://docs.e3sm.org/e3sm_data_docs/_build/html/v1/WaterCycle/simulation_data/simulation_table.html
44- standard_prefixes : List [str ] = ["/home/projects/e3sm/www/WaterCycle/E3SMv1/LR/" , "/home/projects/e3sm/www/WaterCycle/E3SMv1/HR/" ]
45- # Try the user-specific paths from
46- # https://e3sm.atlassian.net/wiki/spaces/ED/pages/4495441922/V1+Simulation+backfill+WIP
47- user_prefixes : List [str ] = [
48- "/home/b/beharrop/E3SM_simulations/" ,
49- "/home/c/chengzhu/" ,
50- "/home/d/dcomeau/cryosphere_simulations/" ,
51- "/home/g/golaz/2018/E3SM_simulations/" ,
52- "/home/g/golaz/2018/E3SM_simulations/repaired/" ,
53- "/home/g/golaz/2019/E3SM_simulations/" ,
54- "/home/g/golaz/2019/E3SM_simulations/repaired/" ,
55- "/home/j/jinyun/CBGCv1/" ,
56- "/home/j/jonbob/" ,
57- "/home/m/maltrud/E3SM/" ,
58- "/home/n/ndk/2019/" ,
59- "/home/n/ndk/2020/" ,
60- "/home/n/ndk/2021/" ,
61- "/home/projects/m3412/"
62- "/home/s/shix/E3SM/" ,
63- "/home/t/tang30/2018/E3SM_simulations/" ,
64- "/home/t/tang30/2018/E3SM_simulations/repaired/" ,
65- "/home/t/tang30/2019/E3SM_simulations/" ,
66- "/home/t/tang30/2019/E3SM_simulations/repaired/" ,
67- "/home/q/qzhu/CBGCv1/" ,
68- "/home/z/zshaheen/2018/E3SM_simulations/" ,
69- "/home/z/zshaheen/2018/E3SM_simulations/repaired/" ,
70- ]
71- # Try the `/home/projects/e3sm/www/publication-archives/` paths noted in
72- # https://github.com/E3SM-Project/e3sm_data_docs/pull/59#issuecomment-3063668732
73- pub_prefixes : List [str ] = [
74- "/home/projects/e3sm/www/publication-archives/pub_archive_E3SM_1_0_"
75- ]
76-
77- # For testing
78- # containers = containers[15:25]
79-
80- num_prefixes : int = len (standard_prefixes ) + len (user_prefixes ) + len (pub_prefixes )
80+ num_prefixes : int = len (CENTRALIZED_PREFIXES ) + len (USER_PREFIXES ) + len (PUBLICATION_PREFIXES )
8181 num_simulations : int = len (containers )
8282 print (f"Trying { num_prefixes } paths for { num_simulations } simulations ({ num_prefixes * num_simulations } combinations)" )
83- # Trying 23 paths for 91 simulations (2093 combinations)
84-
8583 for container in containers :
86- for prefix in standard_prefixes :
84+ for prefix in CENTRALIZED_PREFIXES :
8785 add_hpss_paths (container , prefix )
88- for prefix in user_prefixes :
86+ for prefix in USER_PREFIXES :
8987 add_hpss_paths (container , prefix )
90- for prefix in pub_prefixes :
88+ for prefix in PUBLICATION_PREFIXES :
9189 add_hpss_paths (container , prefix )
9290
9391def add_hpss_paths (container : DataPathsContainer , hpss_prefix : str ):
@@ -114,13 +112,41 @@ def construct_markdown_table(containers: List[DataPathsContainer]):
114112 output_file : str = "out_simulation_paths.md"
115113 with open (output_file , "w" ) as f :
116114 f .write ("# Data Paths\n \n " )
117- f .write ("| Simulation name | Real paths | Symlink paths | Num hard copies | Num total copies |\n " )
118- f .write ("| --- | --- | --- | --- | --- |\n " )
115+ f .write ("| Simulation name | Physically present in this many places | Accessible from this many places | Physically in publication archives? | Accessible from centralized location? | Real paths | Symlink paths |\n " )
116+ f .write ("| --- | --- | --- | --- | --- | --- | --- | \n " )
119117 for container in containers :
118+ physically_in_publication_archive : bool = False
119+ accessible_from_central_location : bool = False
120120 symlinks : List [str ] = []
121121 for (src , dst ) in container .symlink_paths :
122122 symlinks .append (f"{ src } -> { dst } " )
123- f .write (f"| { container .simulation_name } | { container .real_paths } | { symlinks } | { container .get_num_hard_copies ()} | { container .get_num_total_copies ()} | \n " )
123+ if not physically_in_publication_archive :
124+ physically_in_publication_archive = check_for_prefix (dst , PUBLICATION_PREFIXES )
125+ if not accessible_from_central_location :
126+ accessible_from_central_location = check_for_prefix (src , CENTRALIZED_PREFIXES )
127+ if not physically_in_publication_archive :
128+ for path in container .real_paths :
129+ physically_in_publication_archive = check_for_prefix (path , PUBLICATION_PREFIXES )
130+ if physically_in_publication_archive :
131+ break
132+ if not accessible_from_central_location :
133+ for path in container .real_paths :
134+ accessible_from_central_location = check_for_prefix (path , CENTRALIZED_PREFIXES )
135+ if accessible_from_central_location :
136+ break
137+ f .write (f"| { container .simulation_name } | { container .get_num_hard_copies ()} | { container .get_num_total_copies ()} | { bool2str (physically_in_publication_archive )} | { bool2str (accessible_from_central_location )} | { container .real_paths } | { symlinks } |\n " )
138+
139+ def check_for_prefix (path : str , prefix_list : List [str ]):
140+ for prefix in prefix_list :
141+ if path .startswith (prefix ):
142+ return True
143+ return False
144+
145+ def bool2str (val : bool ) -> str :
146+ if val :
147+ return "Yes"
148+ else :
149+ return "" # Empty string
124150
125151if __name__ == "__main__" :
126152 containers : List [DataPathsContainer ] = read_simulations ("input/simulations_v1_water_cycle.csv" )
0 commit comments