11import os
22import re
3- from typing import List , Tuple
3+ from typing import List , Tuple , Union
44
55import numpy as np
66import pandas as pd
@@ -341,32 +341,78 @@ def get_case_percentage_from_file(self, output_log_file: str) -> str:
341341
342342 return "0 %" # if no progress is found
343343
344- def monitor_cases (self ) -> pd .DataFrame :
344+ def monitor_cases (self , value_counts : str = None ) -> Union [ pd .DataFrame , dict ] :
345345 """
346346 Monitor the cases and log relevant information.
347347
348+ Parameters
349+ ----------
350+ value_counts : str, optional
351+ The value counts to be returned.
352+ If "simple", it returns a dictionary with the number of cases in each status.
353+ If "cases", it returns a dictionary with the cases in each status.
354+ Default is None.
355+
348356 Returns
349357 -------
350- pd.DataFrame
351- The cases percentage .
358+ Union[ pd.DataFrame, dict]
359+ The cases status as a pandas DataFrame or a dictionary with aggregated info .
352360 """
353361
354362 cases_percentage = {}
355363
356364 for case_dir in self .cases_dirs :
357- output_log_file = os .path .join (case_dir , "wrapper_out.log" )
358- progress = self .get_case_percentage_from_file (
359- output_log_file = output_log_file
360- )
361- cases_percentage [os .path .basename (case_dir )] = progress
365+ case_dir_name = os .path .basename (case_dir )
366+ if os .path .exists (os .path .join (case_dir , "Errfile" )):
367+ cases_percentage [case_dir_name ] = "Errfile"
368+ elif os .path .exists (os .path .join (case_dir , "norm_end" )):
369+ cases_percentage [case_dir_name ] = "END"
370+ else :
371+ run_tab_file = os .path .join (case_dir , "run.tab" )
372+ if os .path .exists (run_tab_file ):
373+ run_tab = self ._read_tabfile (file_path = run_tab_file )
374+ if run_tab .isnull ().values .any ():
375+ cases_percentage [case_dir_name ] = "NaN"
376+ continue
377+ else :
378+ cases_percentage [case_dir_name ] = "No run.tab"
379+ continue
380+ output_log_file = os .path .join (case_dir , "wrapper_out.log" )
381+ progress = self .get_case_percentage_from_file (
382+ output_log_file = output_log_file
383+ )
384+ cases_percentage [case_dir_name ] = progress
362385
363- return pd .DataFrame (cases_percentage .items (), columns = ["Case" , "Percentage" ])
386+ full_monitorization_df = pd .DataFrame (
387+ cases_percentage .items (), columns = ["Case" , "Percentage" ]
388+ )
389+ if value_counts :
390+ value_counts_df = full_monitorization_df .set_index ("Case" ).value_counts ()
391+ if value_counts == "simple" :
392+ return value_counts_df
393+ value_counts_unique_values = [
394+ run_type [0 ] for run_type in value_counts_df .index .values
395+ ]
396+ value_counts_dict = {
397+ run_type : list (
398+ full_monitorization_df .where (
399+ full_monitorization_df ["Percentage" ] == run_type
400+ )
401+ .dropna ()["Case" ]
402+ .values
403+ )
404+ for run_type in value_counts_unique_values
405+ }
406+ return value_counts_dict
407+ else :
408+ return full_monitorization_df
364409
365410 def postprocess_case (
366411 self ,
367412 case_num : int ,
368413 case_dir : str ,
369414 output_vars : List [str ] = None ,
415+ force : bool = False ,
370416 remove_tab : bool = False ,
371417 remove_nc : bool = False ,
372418 ) -> xr .Dataset :
@@ -379,6 +425,8 @@ def postprocess_case(
379425 The case number.
380426 case_dir : str
381427 The case directory.
428+ force : bool, optional
429+ Force the postprocessing, re-creating the output.nc file. Default is False.
382430 output_vars : list, optional
383431 The output variables to postprocess. Default is None.
384432 remove_tab : bool, optional
@@ -403,13 +451,15 @@ def postprocess_case(
403451 output_vars = list (self .postprocess_functions .keys ())
404452
405453 output_nc_path = os .path .join (case_dir , "output.nc" )
406- if not os .path .exists (output_nc_path ):
454+ if not os .path .exists (output_nc_path ) or force :
407455 # Convert tab files to netCDF file
408456 output_path = os .path .join (case_dir , "output.tab" )
409457 run_path = os .path .join (case_dir , "run.tab" )
410458 output_nc = self ._convert_case_output_files_to_nc (
411459 case_num = case_num , output_path = output_path , run_path = run_path
412460 )
461+ if os .path .exists (output_nc_path ):
462+ os .remove (output_nc_path )
413463 output_nc .to_netcdf (output_nc_path )
414464 else :
415465 self .logger .info ("Reading existing output.nc file." )
@@ -432,7 +482,10 @@ def postprocess_case(
432482 ds = xr .merge (var_ds_list , compat = "no_conflicts" )
433483
434484 # Save Dataset to netCDF file
435- ds .to_netcdf (os .path .join (case_dir , "output_postprocessed.nc" ))
485+ processed_nc_path = os .path .join (case_dir , "output_postprocessed.nc" )
486+ if os .path .exists (processed_nc_path ):
487+ os .remove (processed_nc_path )
488+ ds .to_netcdf (processed_nc_path )
436489
437490 # Remove raw files to save space
438491 if remove_tab :
0 commit comments