3737import glob
3838
3939from collections import OrderedDict
40+ from functools import partial
4041
4142import numpy as np
4243import pandas as pd
4344from netCDF4 import Dataset
4445
4546
4647def component_file_instance (component , case_file ):
47- search_regex = r' {c}_[0-9]+' .format (c = component )
48+ search_regex = r" {c}_[0-9]+" .format (c = component )
4849 result = re .search (search_regex , case_file ).group (0 )
49- return int (result .replace (' {}_' .format (component ), '' ))
50+ return int (result .replace (" {}_" .format (component ), "" ))
5051
5152
52- def file_date_str (case_file , style = 'short' ):
53- if style == 'full' :
54- search_regex = r'h0\.[0-9]+-[0-9]+-[0-9]+-[0-9]+.nc'
55- elif style == 'short' :
56- search_regex = r'h0\.[0-9]+-[0-9]+.nc'
53+ def file_date_str (case_file , style = "short" , hist_name = "h0" ):
54+ if style == "full" :
55+ search_regex = r"{}\.[0-9]+-[0-9]+-[0-9]+-[0-9]+.nc" .format (hist_name )
56+ elif style == "med" :
57+ search_regex = r"{}\.[0-9]+-[0-9]+-[0-9]+.nc" .format (hist_name )
58+ elif style == "short" :
59+ search_regex = r"{}\.[0-9]+-[0-9]+.nc" .format (hist_name )
5760 else :
58- # FIXME: log warning here
59- search_regex = r'h0\.[0-9]+-[0-9]+.nc'
61+ search_regex = r"{}\.[0-9]+-[0-9]+.nc" . format ( hist_name )
62+
6063 result = re .search (search_regex , case_file ).group (0 )
61- return result .replace ('h0.' , '' ).replace ('.nc' , '' )
64+ return result .replace ("{}." .format (hist_name ), "" ).replace (".nc" , "" )
65+
6266
67+ def component_monthly_files (dir_ , component , ninst , hist_name = "h0" , nmonth_max = 12 , date_style = "short" ):
68+ if date_style == "full" :
69+ date_search = "????-??-??-??"
70+ elif date_style == "med" :
71+ date_search = "????-??-??"
72+ else :
73+ date_search = "????-??"
6374
64- def component_monthly_files (dir_ , component , ninst ):
65- base = ' {d}/*{c}_????.h0 .????-??.nc' .format (d = dir_ , c = component )
75+ def component_monthly_files (dir_ , component , ninst , hist_name = "hist" , nmonth_max = 24 , date_style = "short" ):
76+ base = " {d}/*{c}_????.{n} .????-??-?? .nc" .format (d = dir_ , c = component , n = hist_name )
6677 search = os .path .normpath (base )
6778 result = sorted (glob .glob (search ))
6879
6980 instance_files = OrderedDict ()
81+ _file_date_str = partial (file_date_str , style = date_style , hist_name = hist_name )
7082 for ii in range (1 , ninst + 1 ):
71- instance_files [ii ] = sorted (filter (lambda x : component_file_instance (component , x ) == ii , result ),
72- key = file_date_str )
73- if len (instance_files [ii ]) > 12 :
74- instance_files [ii ] = instance_files [ii ][- 12 :]
83+ instance_files [ii ] = sorted (
84+ filter (lambda x : component_file_instance (component , x ) == ii , result ),
85+ key = _file_date_str ,
86+ )
87+ if len (instance_files [ii ]) > nmonth_max :
88+ instance_files [ii ] = instance_files [ii ][- nmonth_max :]
7589
7690 return instance_files
7791
7892
93+ def get_variable_meta (dataset , var_name ):
94+ try :
95+ _name = f": { dataset .variables [var_name ].getncattr ('long_name' )} "
96+ except AttributeError :
97+ _name = ""
98+ try :
99+ _units = f" [{ dataset .variables [var_name ].getncattr ('units' )} ]"
100+ except AttributeError :
101+ _units = ""
102+ return {"long_name" : _name , "units" : _units }
103+
104+
79105def gather_monthly_averages (ensemble_files , variable_set = None ):
80106 monthly_avgs = []
81107 for case , inst_dict in six .iteritems (ensemble_files ):
@@ -101,16 +127,29 @@ def gather_monthly_averages(ensemble_files, variable_set=None):
101127 continue
102128 else :
103129 m = np .mean (data .variables [var ][0 , ...])
104- try :
105- _name = f": { data .variables [var ].getncattr ('long_name' )} "
106- except AttributeError :
107- _name = ""
108- try :
109- _units = f" [{ data .variables [var ].getncattr ('units' )} ]"
110- except AttributeError :
111- _units = ""
112- desc = f"{ _name } { _units } "
130+
131+ desc = "{long_name}{units}" .format (** get_variable_meta (data , var ))
113132 monthly_avgs .append ((case , var , '{:04}' .format (inst ), date_str , m , desc ))
114133
115134 monthly_avgs = pd .DataFrame (monthly_avgs , columns = ('case' , 'variable' , 'instance' , 'date' , 'monthly_mean' , 'desc' ))
116135 return monthly_avgs
136+
137+
138+ def load_mpas_climatology_ensemble (files , field_name , mask_value = None ):
139+ # Get the first file to set up ensemble array output
140+ with Dataset (files [0 ], "r" ) as dset :
141+ _field = dset .variables [field_name ][:].squeeze ()
142+ var_desc = "{long_name}{units}" .format (** get_variable_meta (dset , field_name ))
143+
144+ dims = _field .shape
145+ ens_out = np .ma .zeros ([* dims , len (files )])
146+ ens_out [..., 0 ] = _field
147+ for idx , file_name in enumerate (files [1 :]):
148+ with Dataset (file_name , "r" ) as dset :
149+ _field = dset .variables [field_name ][:].squeeze ()
150+ ens_out [..., idx + 1 ] = _field
151+
152+ if mask_value :
153+ ens_out = np .ma .masked_less (ens_out , mask_value )
154+
155+ return {"data" : ens_out , "desc" : var_desc }
0 commit comments