Skip to content

Commit 6793fb9

Browse files
committed
Support specifying files to convert via regex
Users can specify the basename of the files to be converted using a regex via the "--bp-file" option. Only files with basenames that match the regex are converted.
1 parent 3e3e163 commit 6793fb9

File tree

4 files changed

+66
-30
lines changed

4 files changed

+66
-30
lines changed

tools/adios2pio-nm/README_conversion_tool.txt

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,17 @@ EXAMPLES:
1313

1414
# Program options
1515

16-
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe
16+
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --help
1717
Usage : ./tools/adios2pio-nm/adios2pio-nm.exe --[OPTIONAL ARG1 NAME]=[OPTIONAL ARG1 VALUE] --[OPTIONAL ARG2 NAME]=[OPTIONAL ARG2 VALUE] ...
1818
Optional Arguments :
19-
--bp-file: data produced by SCORPIO with ADIOS format
20-
--idir: Directory containing data output from SCORPIO (in ADIOS format)
21-
--nc-file: output file name after conversion
19+
--bp-file: input file (or regex of basename for multiple files) in ADIOS BP format
20+
--idir: Directory containing the input files (in ADIOS BP format)
21+
--nc-file: output file name after conversion (will be used as prefix when converting multiple files)
2222
--pio-format: output SCORPIO I/O type. Supported parameters: "pnetcdf", "netcdf", "netcdf4c", "netcdf4p", "nczarr"
2323
--rearr: SCORPIO rearranger. Supported parameters: "subset", "box", "any". Default "any".
24-
--rm-bp-file-after-conv: remove the ADIOS BP file after conversion
24+
--rm-bp-file-after-conv: remove the ADIOS BP file after conversion (or regex to specify which files to delete after conversion)
2525
--verbose: Turn on verbose info messages
2626

27-
2827
# Delete all ADIOS BP files after conversion
2928
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --bp-file=/home/jayesh/scorpio_build2/tests/general/test_pio_file_simple_tests.testfile.bp --rm-bp-file-after-conv
3029
Converting BP file : "/home/jayesh/scorpio_build2/tests/general/test_pio_file_simple_tests.testfile.bp"
@@ -52,3 +51,16 @@ Converting BP file : "/scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/tes
5251
Deleting BP file : /scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/test_F2010_ne4_oQU240.eam.r.0001-01-02-00000.nc.bp
5352
Converting BP file : "/scratch/jayesh/e3sm/scratch/test_F2010_ne4_oQU240/run/test_F2010_ne4_oQU240.mosart.r.0001-01-02-00000.nc.bp"
5453

54+
# Convert ONLY coupler output in ADIOS BP format : Specify the regex for files to convert via "--bp-file" option
55+
jayesh@compute-386-03:~/scorpio_build2$ ./tools/adios2pio-nm/adios2pio-nm.exe --idir=/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir --bp-file=".*.cpl.*"
56+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mpassi.rst.0001-01-03_00000.nc.bp
57+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mosart.r.0001-01-03-00000.nc.bp
58+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.elm.r.0001-01-03-00000.nc.bp
59+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.rh0.0001-01-03-00000.nc.bp
60+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.r.0001-01-03-00000.nc.bp
61+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.mosart.rh0.0001-01-03-00000.nc.bp
62+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.elm.rh0.0001-01-03-00000.nc.bp
63+
WARNING: Skipping BP file (did not match specified regex): /scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.eam.rs.0001-01-03-00000.nc.bp
64+
Converting BP file : "/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.cpl.hi.0001-01-04-00000.nc.bp"
65+
Converting BP file : "/scratch/jayesh/e3sm/scratch/tmp_adios_bp_run_dir/ERP_Ld3.ne4pg2_oQU480.F2010.anlgce-ub22_gnu.io-force_adios.20240717_210751_7we0ju.cpl.r.0001-01-03-00000.nc.bp"
66+

tools/adios2pio-nm/adios2pio-nm-lib.cxx

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2831,7 +2831,8 @@ static bool IsBPDir(const std::string &bp_dname,
28312831
* corresponding file name prefixes to be used for converted
28322832
* files
28332833
*/
2834-
static int FindBPDirs(const string &bppdir,
2834+
static int FindBPDirs(MPI_Comm comm, const string &bppdir,
2835+
const string &bpdir_name_rgx,
28352836
vector<string> &bpdirs,
28362837
vector<string> &conv_fname_prefixes)
28372838
{
@@ -2842,6 +2843,21 @@ static int FindBPDirs(const string &bppdir,
28422843
return BP2PIO_ERROR;
28432844
}
28442845

2846+
int rank = 0;
2847+
MPI_Comm_rank(comm, &rank);
2848+
#ifdef SPIO_NO_CXX_REGEX
2849+
if(!bpdir_name_rgx.empty())
2850+
{
2851+
if(rank == 0)
2852+
{
2853+
printf("WARNING: C++ regex not supported, ignoring regex for converting files (converting all files)...\n");
2854+
}
2855+
}
2856+
#else
2857+
std::smatch match;
2858+
std::regex rgx((!bpdir_name_rgx.empty()) ? bpdir_name_rgx : ".*");
2859+
#endif
2860+
28452861
struct dirent *pde = NULL;
28462862
while ((pde = readdir(pdir)) != NULL)
28472863
{
@@ -2852,6 +2868,16 @@ static int FindBPDirs(const string &bppdir,
28522868
if ((pde->d_type == DT_DIR) &&
28532869
IsBPDir(dname, dname_prefix))
28542870
{
2871+
#ifndef SPIO_NO_CXX_REGEX
2872+
if(!std::regex_match(dname, match, rgx))
2873+
{
2874+
if(rank == 0)
2875+
{
2876+
printf("WARNING: Skipping BP file (did not match specified regex): %s\n", dname.c_str());
2877+
}
2878+
continue;
2879+
}
2880+
#endif
28552881
conv_fname_prefixes.push_back(dname_prefix);
28562882
const std::string NC_SUFFIX(".nc");
28572883
const std::string BP_SUFFIX(".bp");
@@ -2868,21 +2894,23 @@ static int FindBPDirs(const string &bppdir,
28682894
* bppdir: Directory containing multiple directories, named "*.bp",
28692895
* each directory containing BP files corresponding to a single
28702896
* file. This is the "BP Parent Directory".
2897+
* bpdir_name_rgx : Regex to match BP directories/files that need to be converted
28712898
* piotype: The PIO IO type used for converting BP files to NetCDF using PIO
28722899
* comm: The MPI communicator to be used for conversion
28732900
*
28742901
* The function looks for all directories in bppdir named "*.bp"
28752902
* and converts them, one at a time, to NetCDF files
28762903
*/
2877-
int MConvertBPToNC(const string &bppdir, const string &piotype, const string &rearr,
2904+
int MConvertBPToNC(const string &bppdir, const std::string &bpdir_name_rgx,
2905+
const string &piotype, const string &rearr,
28782906
MPI_Comm comm, const std::string &rm_ifname_rgx)
28792907
{
28802908
int ierr = BP2PIO_NOERR;
28812909
vector<string> bpdirs;
28822910
vector<string> conv_fname_prefixes;
28832911
const std::string CONV_FNAME_SUFFIX(".nc");
28842912

2885-
ierr = FindBPDirs(bppdir, bpdirs, conv_fname_prefixes);
2913+
ierr = FindBPDirs(comm, bppdir, bpdir_name_rgx, bpdirs, conv_fname_prefixes);
28862914
if (ierr != BP2PIO_NOERR)
28872915
{
28882916
fprintf(stderr, "Unable to read directory, %s\n", bppdir.c_str());

tools/adios2pio-nm/adios2pio-nm-lib.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ int ConvertBPToNC(const string &infilepath,
1919
const string &outfilename,
2020
const string &piotype, const string &rearr, MPI_Comm comm_in,
2121
const std::string &rm_ifname_rgx);
22-
int MConvertBPToNC(const string &bppdir, const string &piotype, const string &rearr,
23-
MPI_Comm comm, const std::string &rm_ifname_rgx);
22+
int MConvertBPToNC(const string &bppdir, const string &ifname_rgx, const string &piotype,
23+
const string &rearr, MPI_Comm comm, const std::string &rm_ifname_rgx);
2424
void SetDebugOutput(int val);
2525

2626
#endif /* #ifndef _ADIOS2PIO_NM_LIB_H_ */

tools/adios2pio-nm/adios2pio-nm.cxx

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
static void init_user_options(spio_tool_utils::ArgParser &ap)
1111
{
12-
ap.add_opt("bp-file", "data produced by SCORPIO with ADIOS format")
13-
.add_opt("rm-bp-file-after-conv", "remove the ADIOS BP file after conversion")
14-
.add_opt("idir", "Directory containing data output from SCORPIO (in ADIOS format)")
15-
.add_opt("nc-file", "output file name after conversion")
12+
ap.add_opt("bp-file", "input file (or regex of basename for multiple files) in ADIOS BP format")
13+
.add_opt("rm-bp-file-after-conv", "remove the ADIOS BP file after conversion (or regex to specify which files to delete after conversion)")
14+
.add_opt("idir", "Directory containing the input files (in ADIOS BP format)")
15+
.add_opt("nc-file", "output file name after conversion (will be used as prefix when converting multiple files)")
1616
.add_opt("pio-format", "output SCORPIO I/O type. Supported parameters: \"pnetcdf\", \"netcdf\", \"netcdf4c\", \"netcdf4p\", \"nczarr\"")
1717
.add_opt("rearr", "SCORPIO rearranger. Supported parameters: \"subset\", \"box\", \"any\". Default \"any\".")
1818
.add_opt("verbose", "Turn on verbose info messages");
@@ -52,7 +52,7 @@ static int get_user_options(
5252
spio_tool_utils::ArgParser &ap,
5353
int argc, char *argv[],
5454
std::string &idir,
55-
std::string &ifile, std::string &ofile,
55+
std::string &ifname_rgx, std::string &ofile,
5656
std::string &rm_ifname_rgx,
5757
std::string &otype,
5858
std::string &rearr,
@@ -67,25 +67,20 @@ static int get_user_options(
6767
#else
6868
ap.parse(argc, argv);
6969
#endif
70+
7071
if(!ap.has_arg("bp-file") && !ap.has_arg("idir")){
7172
ap.print_usage(std::cerr);
7273
return 1;
7374
}
75+
7476
if(ap.has_arg("bp-file")){
75-
ifile = ap.get_arg<std::string>("bp-file");
77+
ifname_rgx = ap.get_arg<std::string>("bp-file");
7678
if(ap.has_arg("nc-file")){
7779
ofile = ap.get_arg<std::string>("nc-file");
7880
}
79-
else{
80-
ofile = strip_bp_ext(ifile);
81-
}
82-
if(ofile.size() == 0){
83-
ap.print_usage(std::cerr);
84-
return 1;
85-
}
8681
}
87-
else{
88-
assert(ap.has_arg("idir"));
82+
83+
if(ap.has_arg("idir")){
8984
idir = ap.get_arg<std::string>("idir");
9085
}
9186

@@ -145,11 +140,11 @@ int main(int argc, char *argv[])
145140
init_user_options(ap);
146141

147142
/* Parse the user options */
148-
string idir, infilepath, outfilename, piotype, rearr;
143+
string idir, ifname_rgx, outfilename, piotype, rearr;
149144
string rm_ifname_rgx;
150145
int debug_lvl = 0;
151146
ret = get_user_options(ap, argc, argv,
152-
idir, infilepath, outfilename,
147+
idir, ifname_rgx, outfilename,
153148
rm_ifname_rgx, piotype, rearr, debug_lvl, rank);
154149

155150
if(ret != 0){
@@ -168,10 +163,11 @@ int main(int argc, char *argv[])
168163
SetDebugOutput(debug_lvl);
169164
MPI_Barrier(comm_in);
170165
if(idir.size() == 0){
171-
ret = ConvertBPToNC(infilepath, outfilename, piotype, rearr, comm_in, rm_ifname_rgx);
166+
/* FIXME: Hopefully ifname_rgx is actually a file name - not regex */
167+
ret = ConvertBPToNC(ifname_rgx, outfilename, piotype, rearr, comm_in, rm_ifname_rgx);
172168
}
173169
else{
174-
ret = MConvertBPToNC(idir, piotype, rearr, comm_in, rm_ifname_rgx);
170+
ret = MConvertBPToNC(idir, ifname_rgx, piotype, rearr, comm_in, rm_ifname_rgx);
175171
}
176172
MPI_Barrier(comm_in);
177173

0 commit comments

Comments
 (0)