Skip to content

Commit bd92608

Browse files
Merge pull request #651 from E3SM-Project/jayeshkrishna/conv_tool_files_to_convert_via_rgx
Allow users to specify input files (in ADIOS BP format) via a regular expression. Users can now specify a regular expression for the basename of the input files to be convered via the "--bp-file" command line option (the directory of the input files can be specified via "--idir" option)
2 parents 181c215 + 6793fb9 commit bd92608

File tree

4 files changed

+175
-157
lines changed

4 files changed

+175
-157
lines changed

tools/adios2pio-nm/README_conversion_tool.txt

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,17 @@ EXAMPLES:
1313

1414
# Program options
1515

16-
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe
16+
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --help
1717
Usage : ./tools/adios2pio-nm/adios2pio-nm.exe --[OPTIONAL ARG1 NAME]=[OPTIONAL ARG1 VALUE] --[OPTIONAL ARG2 NAME]=[OPTIONAL ARG2 VALUE] ...
1818
Optional Arguments :
19-
--bp-file: data produced by SCORPIO with ADIOS format
20-
--idir: Directory containing data output from SCORPIO (in ADIOS format)
21-
--nc-file: output file name after conversion
19+
--bp-file: input file (or regex of basename for multiple files) in ADIOS BP format
20+
--idir: Directory containing the input files (in ADIOS BP format)
21+
--nc-file: output file name after conversion (will be used as prefix when converting multiple files)
2222
--pio-format: output SCORPIO I/O type. Supported parameters: "pnetcdf", "netcdf", "netcdf4c", "netcdf4p", "nczarr"
2323
--rearr: SCORPIO rearranger. Supported parameters: "subset", "box", "any". Default "any".
24-
--rm-bp-file-after-conv: remove the ADIOS BP file after conversion
24+
--rm-bp-file-after-conv: remove the ADIOS BP file after conversion (or regex to specify which files to delete after conversion)
2525
--verbose: Turn on verbose info messages
2626

27-
2827
# Delete all ADIOS BP files after conversion
2928
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --bp-file=/home/jayesh/scorpio_build2/tests/general/test_pio_file_simple_tests.testfile.bp --rm-bp-file-after-conv
3029
Converting BP file : "/home/jayesh/scorpio_build2/tests/general/test_pio_file_simple_tests.testfile.bp"
@@ -52,3 +51,16 @@ Converting BP file : "/scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/tes
5251
Deleting BP file : /scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/test_F2010_ne4_oQU240.eam.r.0001-01-02-00000.nc.bp
5352
Converting BP file : "/scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/test_F2010_ne4_oQU240.mosart.r.0001-01-02-00000.nc.bp"
5453

54+
# Convert ONLY coupler output in ADIOS BP format : Specify the regex for files to convert via "--bp-file" option
55+
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --idir=/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir --bp-file=".*.cpl.*"
56+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mpassi.rst.0001-01-03_00000.nc.bp
57+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mosart.r.0001-01-03-00000.nc.bp
58+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.elm.r.0001-01-03-00000.nc.bp
59+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.rh0.0001-01-03-00000.nc.bp
60+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.r.0001-01-03-00000.nc.bp
61+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mosart.rh0.0001-01-03-00000.nc.bp
62+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.elm.rh0.0001-01-03-00000.nc.bp
63+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.rs.0001-01-03-00000.nc.bp
64+
Converting BP file : "/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.cpl.hi.0001-01-04-00000.nc.bp"
65+
Converting BP file : "/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.cpl.r.0001-01-03-00000.nc.bp"
66+

tools/adios2pio-nm/adios2pio-nm-lib.cxx

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2831,7 +2831,8 @@ static bool IsBPDir(const std::string &bp_dname,
28312831
* corresponding file name prefixes to be used for converted
28322832
* files
28332833
*/
2834-
static int FindBPDirs(const string &bppdir,
2834+
static int FindBPDirs(MPI_Comm comm, const string &bppdir,
2835+
const string &bpdir_name_rgx,
28352836
vector<string> &bpdirs,
28362837
vector<string> &conv_fname_prefixes)
28372838
{
@@ -2842,6 +2843,21 @@ static int FindBPDirs(const string &bppdir,
28422843
return BP2PIO_ERROR;
28432844
}
28442845

2846+
int rank = 0;
2847+
MPI_Comm_rank(comm, &rank);
2848+
#ifdef SPIO_NO_CXX_REGEX
2849+
if(!bpdir_name_rgx.empty())
2850+
{
2851+
if(rank == 0)
2852+
{
2853+
printf("WARNING: C++ regex not supported, ignoring regex for converting files (converting all files)...\n");
2854+
}
2855+
}
2856+
#else
2857+
std::smatch match;
2858+
std::regex rgx((!bpdir_name_rgx.empty()) ? bpdir_name_rgx : ".*");
2859+
#endif
2860+
28452861
struct dirent *pde = NULL;
28462862
while ((pde = readdir(pdir)) != NULL)
28472863
{
@@ -2852,6 +2868,16 @@ static int FindBPDirs(const string &bppdir,
28522868
if ((pde->d_type == DT_DIR) &&
28532869
IsBPDir(dname, dname_prefix))
28542870
{
2871+
#ifndef SPIO_NO_CXX_REGEX
2872+
if(!std::regex_match(dname, match, rgx))
2873+
{
2874+
if(rank == 0)
2875+
{
2876+
printf("WARNING: Skipping BP file (did not match specified regex): %s\n", dname.c_str());
2877+
}
2878+
continue;
2879+
}
2880+
#endif
28552881
conv_fname_prefixes.push_back(dname_prefix);
28562882
const std::string NC_SUFFIX(".nc");
28572883
const std::string BP_SUFFIX(".bp");
@@ -2868,21 +2894,23 @@ static int FindBPDirs(const string &bppdir,
28682894
* bppdir: Directory containing multiple directories, named "*.bp",
28692895
* each directory containing BP files corresponding to a single
28702896
* file. This is the "BP Parent Directory".
2897+
* bpdir_name_rgx : Regex to match BP directories/files that need to be converted
28712898
* piotype: The PIO IO type used for converting BP files to NetCDF using PIO
28722899
* comm: The MPI communicator to be used for conversion
28732900
*
28742901
* The function looks for all directories in bppdir named "*.bp"
28752902
* and converts them, one at a time, to NetCDF files
28762903
*/
2877-
int MConvertBPToNC(const string &bppdir, const string &piotype, const string &rearr,
2904+
int MConvertBPToNC(const string &bppdir, const std::string &bpdir_name_rgx,
2905+
const string &piotype, const string &rearr,
28782906
MPI_Comm comm, const std::string &rm_ifname_rgx)
28792907
{
28802908
int ierr = BP2PIO_NOERR;
28812909
vector<string> bpdirs;
28822910
vector<string> conv_fname_prefixes;
28832911
const std::string CONV_FNAME_SUFFIX(".nc");
28842912

2885-
ierr = FindBPDirs(bppdir, bpdirs, conv_fname_prefixes);
2913+
ierr = FindBPDirs(comm, bppdir, bpdir_name_rgx, bpdirs, conv_fname_prefixes);
28862914
if (ierr != BP2PIO_NOERR)
28872915
{
28882916
fprintf(stderr, "Unable to read directory, %s\n", bppdir.c_str());

tools/adios2pio-nm/adios2pio-nm-lib.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ int ConvertBPToNC(const string &infilepath,
1919
const string &outfilename,
2020
const string &piotype, const string &rearr, MPI_Comm comm_in,
2121
const std::string &rm_ifname_rgx);
22-
int MConvertBPToNC(const string &bppdir, const string &piotype, const string &rearr,
23-
MPI_Comm comm, const std::string &rm_ifname_rgx);
22+
int MConvertBPToNC(const string &bppdir, const string &ifname_rgx, const string &piotype,
23+
const string &rearr, MPI_Comm comm, const std::string &rm_ifname_rgx);
2424
void SetDebugOutput(int val);
2525

2626
#endif /* #ifndef _ADIOS2PIO_NM_LIB_H_ */

0 commit comments

Comments
 (0)