11import datetime
22import numpy as np
33import pytz
4+ import pandas as pd
45import matplotlib .pyplot as plt
56plt .rcParams .update ({'font.size' : 14 })
67from pathlib import Path
@@ -71,14 +72,7 @@ def comp_plot(glider, ctd):
7172 return fig , ax
7273
7374
74- e = init_erddap ()
75- e .dataset_id = "ctd_deployment"
76- df_ctd = e .to_xarray ().drop_dims ("timeseries" ).to_pandas ()
77- df_ctd .index = df_ctd ["time" ]
78- df_ctd = df_ctd .sort_index ()
79-
80-
81- def nearby_ctd (ds_glider , comparison_plots = False , max_dist = 0.5 , max_days = 2 , num_dives = 5 ):
75+ def nearby_ctd (df_ctd , ds_glider , comparison_plots = False , max_dist = 0.5 , max_days = 2 , num_dives = 5 ):
8276
8377 name = f'SEA0{ ds_glider .attrs ["glider_serial" ]} _M{ ds_glider .attrs ["deployment_id" ]} '
8478 df_glider = ds_glider .to_pandas ()
@@ -130,6 +124,7 @@ def nearby_ctd(ds_glider, comparison_plots=False, max_dist=0.5, max_days=2, num_
130124
131125
132126def download_glider_datasets (dataset_ids ):
127+ e = init_erddap ()
133128 dataset_dict = {}
134129 for dataset_id in dataset_ids :
135130 e .dataset_id = dataset_id
@@ -142,21 +137,16 @@ def download_glider_datasets(dataset_ids):
142137 return dataset_dict
143138
144139
145- def recent_ctds ():
146- _log .info ("start to process all CTDs" )
140+ def recent_ctds (df_relevant , df_ctd ):
141+ _log .info ("start to process all recent CTDs" )
147142 ctd_casts = df_ctd .groupby ("cast_no" ).first ()
148- e .dataset_id = "allDatasets"
149- df_datasets = e .to_pandas (parse_dates = ['minTime (UTC)' , 'maxTime (UTC)' ])
150-
151- df_datasets .set_index ("datasetID" , inplace = True )
152- df_datasets .drop ("allDatasets" , inplace = True )
153- df_datasets = df_datasets [df_datasets .index .str [:3 ] == "nrt" ]
154- df_relevant = df_datasets
155143 mintime = ctd_casts .time .min ().replace (tzinfo = pytz .utc ) - datetime .timedelta (days = 1 )
156144 maxtime = ctd_casts .time .max ().replace (tzinfo = pytz .utc ) + datetime .timedelta (days = 1 )
157-
158145 df_relevant = df_relevant [df_relevant ["minTime (UTC)" ] > mintime ]
159146 df_relevant = df_relevant [df_relevant ["maxTime (UTC)" ] < maxtime ]
147+ if df_relevant .empty :
148+ _log .info ("no new ctds to process" )
149+ return
160150 df_relevant ["longitude" ] = (df_relevant ["minLongitude (degrees_east)" ] + df_relevant [
161151 "maxLongitude (degrees_east)" ]) / 2
162152 df_relevant ["latitude" ] = (df_relevant ["minLatitude (degrees_north)" ] + df_relevant [
@@ -172,14 +162,14 @@ def recent_ctds():
172162 df_relevant = df_relevant .sort_values ('minTime (UTC)' )
173163 nrt_dict = download_glider_datasets (df_relevant .index )
174164 i = 0
175- summary_plot (df_relevant , ctd_casts , nrt_dict )
165+ summary_plot (df_relevant , ctd_casts , nrt_dict , df_ctd )
176166 for mission , ds in nrt_dict .items ():
177167 _log .info (f"process: { mission } " )
178168 try :
179- ctds = nearby_ctd (ds , comparison_plots = True , num_dives = 4 )
169+ ctds = nearby_ctd (df_ctd , ds , comparison_plots = True , num_dives = 4 )
180170 except :
181171 _log .warning ("4 dives insufficient. Expanding to 8" )
182- ctds = nearby_ctd (ds , comparison_plots = True , num_dives = 8 )
172+ ctds = nearby_ctd (df_ctd , ds , comparison_plots = True , num_dives = 8 )
183173
184174 found = list (ctds .keys ())
185175 if found == ['deployment' , 'recovery' ]:
@@ -188,17 +178,16 @@ def recent_ctds():
188178 _log .warning (f'{ mission [4 :]} , missing { missing } , { ds .attrs ["basin" ]} ' )
189179 i += 1
190180 _log .info (f"total bad: { i } " )
191- _log .info ("completed process all CTDs" )
192181
193182
194- def summary_plot (df_relevant , ctd_casts , nrt_dict ):
183+ def summary_plot (df_relevant , ctd_casts , nrt_dict , df_ctd ):
195184 colors = plt .rcParams ["axes.prop_cycle" ].by_key ()["color" ]
196185
197186 fig , ax = plt .subplots (figsize = (12 , 8 ))
198187 diff = 0.15
199188 for i , (name , row ) in enumerate (df_relevant .iterrows ()):
200189 ds = nrt_dict [name ]
201- ctds = nearby_ctd (ds )
190+ ctds = nearby_ctd (df_ctd , ds )
202191 found = list (ctds .keys ())
203192 if found == ['deployment' , 'recovery' ]:
204193 continue
@@ -222,4 +211,19 @@ def summary_plot(df_relevant, ctd_casts, nrt_dict):
222211
223212
224213if __name__ == '__main__' :
225- recent_ctds ()
214+ _log .info ("start processing" )
215+ e = init_erddap ()
216+ e .dataset_id = "allDatasets"
217+ df_datasets = e .to_pandas (parse_dates = ['minTime (UTC)' , 'maxTime (UTC)' ])
218+ df_datasets .set_index ("datasetID" , inplace = True )
219+ df_datasets .drop ("allDatasets" , inplace = True )
220+ df_datasets = df_datasets [df_datasets .index .str [:3 ] == "nrt" ]
221+ recent_datasets = df_datasets [df_datasets ['maxTime (UTC)' ] > pd .Timestamp .now ().replace (tzinfo = pytz .utc ) - pd .to_timedelta ("60 days" )]
222+ _log .info ("download ctds" )
223+ e .dataset_id = "ctd_deployment"
224+ df_ctd_table = e .to_xarray (requests_kwargs = {"timeout" : 300 }).drop_dims ("timeseries" ).to_pandas ()
225+ df_ctd_table .index = df_ctd_table ["time" ]
226+ df_ctd = df_ctd_table .sort_index ()
227+ recent_ctds (recent_datasets , df_ctd )
228+ _log .info ("completed process all CTDs" )
229+
0 commit comments