Skip to content

Commit 450fbfc

Browse files
Merge pull request #636 from E3SM-Project/jayeshkrishna/full_trace_and_replay3
Adding support for SCORPIO API tracing. The replay of the API trace is a work in progress, but this PR contains many software components required for replaying the API trace. This PR includes, * Support for tracing SCORPIO APIs * Scripts/Framework required for replaying the trace is also included. However replaying the trace is not yet complete.
2 parents 0e2cf7a + 292a6f8 commit 450fbfc

39 files changed

+2796
-140
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ option (PIO_USE_FORTRAN_LEGACY_LIB "Use the legacy Fortran library" OFF)
3434
option (PIO_ENABLE_TIMING "Enable the use of the GPTL timing library" ON)
3535
option (PIO_ENABLE_INTERNAL_TIMING "Gather and print GPTL timing stats" OFF)
3636
option (PIO_ENABLE_API_TRACING "Enable tracing of SCORPIO APIs" OFF)
37+
option (PIO_ENABLE_API_VAR_TRACING "Enable tracing of variables when tracing SCORPIO APIs" OFF)
3738
option (PIO_ENABLE_IO_STATS "Gather and print I/O performance stats" ON)
3839
option (PIO_ENABLE_LOGGING "Enable debug logging (large output possible)" OFF)
3940
option (PIO_ENABLE_DOC "Enable building SCORPIO documentation" ON)

src/CMakeLists.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,26 @@
1+
#==============================================================================
2+
# Capture build settings for building a separate project (e.g. replay tool)
3+
#==============================================================================
4+
set(SCORPIO_BUILD_CACHE_FILE_BANNER "#CMAKE_PROJECT_NAME=SCORPIO\n")
5+
set(SCORPIO_BUILD_CMAKE_CXX_COMPILER "set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"The CXX Compiler used in SCORPIO build\" FORCE)\n")
6+
set(SCORPIO_BUILD_CMAKE_CXX_FLAGS "set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\" CACHE STRING \"The CXX Compiler flags used in SCORPIO build\" FORCE)\n")
7+
set(SCORPIO_BUILD_CMAKE_C_COMPILER "set(CMAKE_C_COMPILER \"${CMAKE_C_COMPILER}\" CACHE FILEPATH \"The C Compiler used in SCORPIO build\" FORCE)\n")
8+
set(SCORPIO_BUILD_CMAKE_C_FLAGS "set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\" CACHE STRING \"The C Compiler flags used in SCORPIO build\" FORCE)\n")
9+
set(SCORPIO_BUILD_CMAKE_EXE_LINKER_FLAGS "set(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\" CACHE STRING \"The Linker flags used in SCORPIO build\" FORCE)\n")
10+
set(SCORPIO_BUILD_CMAKE_LINKER "set(CMAKE_LINKER \"${CMAKE_LINKER}\" CACHE FILEPATH \"The Linker used in SCORPIO build\" FORCE)\n")
11+
set(SCORPIO_BUILD_CMAKE_MAKE_PROGRAM "set(CMAKE_MAKE_PROGRAM \"${CMAKE_MAKE_PROGRAM}\" CACHE FILEPATH \"The Make program used in SCORPIO build\" FORCE)\n")
12+
13+
set(SCORPIO_BUILD_CACHE_FILE "${CMAKE_BINARY_DIR}/SCORPIO_CMakeCache.txt")
14+
message(STATUS "Caching compiler settings in ${SCORPIO_BUILD_CACHE_FILE}...")
15+
file(WRITE ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CACHE_FILE_BANNER})
16+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_CXX_COMPILER})
17+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_CXX_FLAGS})
18+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_C_COMPILER})
19+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_C_FLAGS})
20+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_EXE_LINKER_FLAGS})
21+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_LINKER})
22+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_MAKE_PROGRAM})
23+
124
#==============================================================================
225
# FIND EXTERNAL LIBRARIES
326
#==============================================================================
@@ -27,6 +50,12 @@ add_subdirectory (clib)
2750

2851
# Add the Fortran library
2952
if (PIO_ENABLE_FORTRAN)
53+
# Saving Fortran build settings in cache file
54+
set(SCORPIO_BUILD_CMAKE_Fortran_COMPILER "set(CMAKE_Fortran_COMPILER \"${CMAKE_Fortran_COMPILER}\" CACHE FILEPATH \"The Fortran Compiler used in SCORPIO build\" FORCE)\n")
55+
set(SCORPIO_BUILD_CMAKE_Fortran_FLAGS "set(CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\" CACHE STRING \"The Fortran Compiler flags used in SCORPIO build\" FORCE)\n")
56+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_Fortran_COMPILER})
57+
file(APPEND ${SCORPIO_BUILD_CACHE_FILE} ${SCORPIO_BUILD_CMAKE_Fortran_FLAGS})
58+
3059
message(STATUS "Enabling the Fortran interface...")
3160

3261
# Check if prereqs for new Fortran interface is available

src/clib/CMakeLists.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ add_library (pioc ${pio_api_src}
4141
pioc.cpp pioc_sc.cpp pio_spmd.cpp pio_rearrange.cpp pio_nc4.cpp bget.cpp
4242
pio_nc.cpp pio_put_nc.cpp pio_get_nc.cpp pio_getput_int.cpp pio_msg.cpp pio_varm.cpp
4343
pio_darray.cpp pio_darray_int.cpp spio_hash.cpp pio_sdecomps_regex.cpp spio_io_summary.cpp
44-
spio_ltimer.cpp spio_serializer.cpp spio_file_mvcache.cpp spio_tracer.cpp spio_rearrange_any.cpp)
44+
spio_ltimer.cpp spio_serializer.cpp spio_file_mvcache.cpp
45+
spio_tracer.cpp spio_tracer_mdata.cpp spio_tracer_decomp.cpp
46+
spio_rearrange_any.cpp)
4547

4648
#==============================================================================
4749
# FIND EXTERNAL LIBRARIES/DEPENDENCIES
@@ -381,9 +383,17 @@ endif ()
381383
if (PIO_ENABLE_API_TRACING)
382384
message(STATUS "Enabling API tracing")
383385
set(ENABLE_API_TRACING 1)
386+
if (PIO_ENABLE_API_VAR_TRACING)
387+
message(STATUS "Enabling variable tracing in APIs")
388+
set(ENABLE_API_VAR_TRACING 1)
389+
else ()
390+
message(STATUS "Disabling variable tracing in APIs (default)")
391+
set(ENABLE_API_VAR_TRACING 0)
392+
endif ()
384393
else ()
385394
message(STATUS "API tracing disabled (default)")
386395
set(ENABLE_API_TRACING 0)
396+
set(ENABLE_API_VAR_TRACING 0)
387397
endif ()
388398

389399
# The MPI library detection was done in the top level
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
This file contains some basic information on how to generate SCORPIO API traces and using the trace files to replay the I/O data patterns.
2+
3+
The following API trace files are generated (when SCORPIO API tracing is enabled),
4+
5+
* spio_trace_log_*.log : Generated for each I/O system and contains the API trace
6+
* spio_trace_mdata_*.log : Generated for each I/O system and contains some meta-data regarding the API trace
7+
* spio_trace_decomp_*.nc : Generated for each I/O system and contains the I/O decompositions used by the I/O system.
8+
* SCORPIO_CMakeCache.txt : Contains info on the SCORPIO build
9+
10+
The above trace files can be used to generate a replay program, spio_replay.exe, that will replay the I/O patterns captured by the trace files.
11+
12+
Generating API trace
13+
---------------------
14+
1) Configure SCORPIO to generate API traces
15+
16+
cmake -DPIO_ENABLE_API_TRACING:BOOL=ON ...
17+
18+
2) Build/Install SCORPIO, Build user app using SCORPIO
19+
3) Run the user app. The API trace files will be output in the run directory.
20+
21+
Creating replay program (generation, build) from the API trace
22+
---------------------------------------------------------------
23+
1) Use the spio_replay.py script to generate and build the replay tool, spio_replay.exe, from the API trace files
24+
25+
Assuming the SCORPIO source is in ~/scorpio, the build/run directory is ~/scorpio_build (trace logs are here), scorpio install directory is ~/scorpio_install,
26+
27+
----------------------------------------------------------------------------
28+
$ ~/scorpio/tools/replay/spio_replay.py --help
29+
=================================================
30+
SCORPIO REPLAY TOOL GENERATOR
31+
=================================================
32+
33+
usage: spio_replay.py [-h] [--scorpio-src-dir scorpio_src_dir] [--scorpio-build-dir scorpio_build_dir] [--scorpio-install-dir scorpio_install_dir] [--trace-log-dir trace_log_dir]
34+
[--replay-tool-dir replay_tool_dir] [-v] [-q]
35+
36+
Script to generate and build spio_replay.exe from SCORPIO trace logs. This script reads the SCORPIO trace logs & trace meta data logs to generate source files to replay the I/O from the trace. These
37+
sources are built, using the same configuration used to build SCORPIO, to create spio_replay.exe . At runtime spio_replay.exe requires the I/O decomposition files, spio_trace_decomp_*.nc, to replay the
38+
I/O data pattern. The script requires the I/O trace logs, spio_trace_log*.log, and the I/O trace meta-data logs, spio_trace_mdata*.log, to generate the executable, spio_replay.exe, that can be used to
39+
replay the I/O data patterns in an application (e.g. E3SM) run
40+
41+
options:
42+
-h, --help show this help message and exit
43+
--scorpio-src-dir scorpio_src_dir
44+
Source directory for the SCORPIO library (default: /home/jayesh/scorpio_build3/tmp)
45+
--scorpio-build-dir scorpio_build_dir
46+
Build directory for the SCORPIO library (default: /home/jayesh/scorpio_build3/tmp)
47+
--scorpio-install-dir scorpio_install_dir
48+
Install directory for the SCORPIO library (default: /home/jayesh/scorpio_build3/tmp)
49+
--trace-log-dir trace_log_dir
50+
Directory containing SCORPIO trace log files (default: /home/jayesh/scorpio_build3/tmp)
51+
--replay-tool-dir replay_tool_dir
52+
Directory to store replay tool source/exe (default: /home/jayesh/scorpio_build3/tmp)
53+
-v, --verbose Turn on verbose mode (default: False)
54+
-q, --quiet Turn on quiet mode (no output to stdout) (default: False)
55+
56+
$ mkdir scorpio_replay_tool_build
57+
$ cd scorpio_replay_tool_build
58+
$ ~/scorpio/tools/replay/spio_replay.py --scorpio-src-dir=~/scorpio --scorpio-build-dir=~/scorpio_build --scorpio-install-dir=~/scorpio_install --trace-log-dir=~/scorpio_build --verbose
59+
----------------------------------------------------------------------------
60+
61+
The above script (spio_replay.py) generates some template source files and builds the scorpio replay tool, spio_replay.exe .
62+
63+
Running the replay program
64+
---------------------------
65+
* Run the replay tool, spio_replay.exe, using the same number of MPI processes
66+

src/clib/api/spio_dim_api.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ int PIOc_inq_dimid(int ncid, const char *name, int *idp)
3333
ret = PIOc_inq_dimid_impl(ncid, name, idp);
3434

3535
#if SPIO_ENABLE_API_TRACING
36+
tr.set_dim_id(ncid, *idp);
3637
tr.add_rval("*idp", idp);
3738
#endif
3839
return ret;
@@ -92,6 +93,7 @@ int PIOc_def_dim(int ncid, const char *name, PIO_Offset len, int *idp)
9293
ret = PIOc_def_dim_impl(ncid, name, len, idp);
9394

9495
#if SPIO_ENABLE_API_TRACING
96+
tr.set_dim_id(ncid, *idp);
9597
tr.add_rval("*idp", idp);
9698
#endif
9799
return ret;

src/clib/api/spio_get_att_api.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int PIOc_get_att_schar(int ncid, int varid, const char *name, signed char *ip)
4040
SPIO_Util::Tracer::Timed_func_call_tracer tr("PIOc_get_att_schar");
4141
tr.set_file_id(ncid).add_arg("ncid", ncid).
4242
add_arg("varid", varid).add_arg("*name", name).
43-
add_arg("*ip", ip).flush();
43+
add_arg("*ip", static_cast<void *>(ip)).flush();
4444
#endif
4545
ret = PIOc_get_att_schar_impl(ncid, varid, name, ip);
4646

@@ -142,9 +142,14 @@ int PIOc_get_att_uchar(int ncid, int varid, const char *name, unsigned char *ip)
142142
SPIO_Util::Tracer::Timed_func_call_tracer tr("PIOc_get_att_uchar");
143143
tr.set_file_id(ncid).add_arg("ncid", ncid).
144144
add_arg("varid", varid).add_arg("*name", name).
145-
add_arg("*ip", ip).flush();
145+
add_arg("*ip", static_cast<void *>(ip)).flush();
146146
#endif
147-
return PIOc_get_att_uchar_impl(ncid, varid, name, ip);
147+
ret = PIOc_get_att_uchar_impl(ncid, varid, name, ip);
148+
149+
#if SPIO_ENABLE_API_TRACING
150+
tr.add_rval("*ip", ip);
151+
#endif
152+
return ret;
148153
}
149154

150155
int PIOc_get_att_ushort(int ncid, int varid, const char *name, unsigned short *ip)

src/clib/api/spio_get_var1_api.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ int PIOc_get_var1_text(int ncid, int varid, const PIO_Offset *index, char *buf)
2828
#endif
2929
ret = PIOc_get_var1_text_impl(ncid, varid, index, buf);
3030

31-
#if SPIO_ENABLE_API_TRACING
31+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
3232
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
3333
#endif
3434
return ret;
@@ -45,7 +45,7 @@ int PIOc_get_var1_schar(int ncid, int varid, const PIO_Offset *index, signed cha
4545
#endif
4646
ret = PIOc_get_var1_schar_impl(ncid, varid, index, buf);
4747

48-
#if SPIO_ENABLE_API_TRACING
48+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
4949
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
5050
#endif
5151
return ret;
@@ -62,7 +62,7 @@ int PIOc_get_var1_short(int ncid, int varid, const PIO_Offset *index, short *buf
6262
#endif
6363
ret = PIOc_get_var1_short_impl(ncid, varid, index, buf);
6464

65-
#if SPIO_ENABLE_API_TRACING
65+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
6666
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
6767
#endif
6868
return ret;
@@ -79,7 +79,7 @@ int PIOc_get_var1_int(int ncid, int varid, const PIO_Offset *index, int *buf)
7979
#endif
8080
ret = PIOc_get_var1_int_impl(ncid, varid, index, buf);
8181

82-
#if SPIO_ENABLE_API_TRACING
82+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
8383
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
8484
#endif
8585
return ret;
@@ -96,7 +96,7 @@ int PIOc_get_var1_long(int ncid, int varid, const PIO_Offset *index, long *buf)
9696
#endif
9797
ret = PIOc_get_var1_long_impl(ncid, varid, index, buf);
9898

99-
#if SPIO_ENABLE_API_TRACING
99+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
100100
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
101101
#endif
102102
return ret;
@@ -113,7 +113,7 @@ int PIOc_get_var1_float(int ncid, int varid, const PIO_Offset *index, float *buf
113113
#endif
114114
ret = PIOc_get_var1_float_impl(ncid, varid, index, buf);
115115

116-
#if SPIO_ENABLE_API_TRACING
116+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
117117
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
118118
#endif
119119
return ret;
@@ -130,7 +130,7 @@ int PIOc_get_var1_double(int ncid, int varid, const PIO_Offset *index, double *b
130130
#endif
131131
ret = PIOc_get_var1_double_impl(ncid, varid, index, buf);
132132

133-
#if SPIO_ENABLE_API_TRACING
133+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
134134
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
135135
#endif
136136
return ret;
@@ -147,7 +147,7 @@ int PIOc_get_var1_uchar(int ncid, int varid, const PIO_Offset *index, unsigned c
147147
#endif
148148
ret = PIOc_get_var1_uchar_impl(ncid, varid, index, buf);
149149

150-
#if SPIO_ENABLE_API_TRACING
150+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
151151
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
152152
#endif
153153
return ret;
@@ -164,7 +164,7 @@ int PIOc_get_var1_ushort(int ncid, int varid, const PIO_Offset *index, unsigned
164164
#endif
165165
ret = PIOc_get_var1_ushort_impl(ncid, varid, index, buf);
166166

167-
#if SPIO_ENABLE_API_TRACING
167+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
168168
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
169169
#endif
170170
return ret;
@@ -181,7 +181,7 @@ int PIOc_get_var1_uint(int ncid, int varid, const PIO_Offset *index, unsigned in
181181
#endif
182182
ret = PIOc_get_var1_uint_impl(ncid, varid, index, buf);
183183

184-
#if SPIO_ENABLE_API_TRACING
184+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
185185
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
186186
#endif
187187
return ret;
@@ -198,7 +198,7 @@ int PIOc_get_var1_longlong(int ncid, int varid, const PIO_Offset *index, long lo
198198
#endif
199199
ret = PIOc_get_var1_longlong_impl(ncid, varid, index, buf);
200200

201-
#if SPIO_ENABLE_API_TRACING
201+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
202202
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
203203
#endif
204204
return ret;
@@ -215,7 +215,7 @@ int PIOc_get_var1_ulonglong(int ncid, int varid, const PIO_Offset *index, unsign
215215
#endif
216216
ret = PIOc_get_var1_ulonglong_impl(ncid, varid, index, buf);
217217

218-
#if SPIO_ENABLE_API_TRACING
218+
#if (SPIO_ENABLE_API_TRACING) && (SPIO_ENABLE_API_VAR_TRACING)
219219
tr.add_rval("*buf", buf, PIO_Util::PIO_Get_Utils::get_vslice_sz_from_sidx(ncid, varid, index));
220220
#endif
221221
return ret;

0 commit comments

Comments
 (0)