Skip to content

Commit b178f38

Browse files
committed
Add chunking and compression driver
1 parent 8f583ff commit b178f38

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+16219
-0
lines changed

configure.ac

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ AH_TEMPLATE([RELAX_COORD_BOUND], [Define if relaxed coordinate check is e
141141
AH_TEMPLATE([ENABLE_LARGE_SINGLE_REQ], [Define if to enable large single MPI-IO request])
142142
AH_TEMPLATE([ENABLE_NULL_BYTE_HEADER_PADDING], [Define if to enable strict null-byte padding in file header])
143143
AH_TEMPLATE([ENABLE_BURST_BUFFER], [Define if to enable burst buffer feature])
144+
AH_TEMPLATE([ENABLE_COMPRESSION], [Define if to enable compression feature])
145+
AH_TEMPLATE([ENABLE_ZLIB], [Define if to enable zlib compression method])
146+
AH_TEMPLATE([ENABLE_SZ], [Define if to enable sz compression method])
144147
AH_TEMPLATE([PNETCDF_PROFILING], [Define if to enable PnetCDF internal performance profiling])
145148
AH_TEMPLATE([ENABLE_THREAD_SAFE], [Define if to enable thread-safe capability])
146149
AH_TEMPLATE([ENABLE_REQ_AGGREGATION], [Define if able to support request aggregation in nonblocking routines])
@@ -2210,6 +2213,129 @@ fi
22102213
AC_SUBST(ENABLE_BURST_BUFFER)
22112214
AM_CONDITIONAL(ENABLE_BURST_BUFFER, [test x$enable_bbdriver = xyes])
22122215

2216+
AC_ARG_ENABLE([compression],
2217+
[AS_HELP_STRING([--enable-compression],
2218+
[Enable chunked compression driver support. @<:@default: disabled@:>@])],
2219+
[enable_compression=${enableval}], [enable_compression=no]
2220+
)
2221+
2222+
ENABLE_COMPRESSION=0
2223+
if test "x$enable_compression" = "xyes" ; then
2224+
AC_DEFINE(ENABLE_COMPRESSION)
2225+
ENABLE_COMPRESSION=1
2226+
fi
2227+
AC_SUBST(ENABLE_COMPRESSION)
2228+
AM_CONDITIONAL(ENABLE_COMPRESSION, [test x$enable_compression = xyes])
2229+
2230+
AC_ARG_ENABLE([zlib],
2231+
[AS_HELP_STRING([--enable-zlib],
2232+
[Enable zlib compression method support. @<:@default: disabled@:>@])],
2233+
[enable_zlib=${enableval}], [enable_zlib=no]
2234+
)
2235+
2236+
ENABLE_ZLIB=0
2237+
if test "x$enable_zlib" = "xyes" ; then
2238+
AC_DEFINE(ENABLE_ZLIB)
2239+
ENABLE_ZLIB=1
2240+
fi
2241+
AC_SUBST(ENABLE_ZLIB)
2242+
AM_CONDITIONAL(ENABLE_ZLIB, [test x$enable_zlib = xyes])
2243+
2244+
if test "x$enable_zlib" = "xyes" ; then
2245+
ZLIB_INSTALL=""
2246+
AC_ARG_WITH(zlib,
2247+
[AS_HELP_STRING([--with-zlib=/path/to/implementation],
2248+
[installation prefix for zlib implementation])],
2249+
if test "x${withval}" = xyes; then
2250+
AC_MSG_ERROR(--with-zlib is set but the value is NULL)
2251+
else
2252+
ZLIB_INSTALL=${withval}
2253+
fi
2254+
)
2255+
2256+
if test "x${ZLIB_INSTALL}" != x ; then
2257+
CPPFLAGS+=" -I${ZLIB_INSTALL}/include"
2258+
LDFLAGS+=" -L${ZLIB_INSTALL}/lib"
2259+
LIBS+=" -lz"
2260+
fi
2261+
2262+
LIBS+=" -lm -ldl"
2263+
2264+
have_zlib=no
2265+
AC_MSG_CHECKING(ZLIB library)
2266+
AC_SEARCH_LIBS([deflate], [z], [have_zlib=yes], [have_zlib=no])
2267+
if test "x${have_zlib}" = xyes; then
2268+
AC_CHECK_HEADERS([zlib.h], [], [have_zlib=no])
2269+
fi
2270+
2271+
if test "x${have_zlib}" = xno; then
2272+
AC_MSG_ERROR([
2273+
------------------------------------------------------------
2274+
The ZLIB library and header file are required to build
2275+
PnetCDF with ZLIB compression support. Use option
2276+
--with-zlib=/path/to/implementation
2277+
to specify the location of ZLIB build.
2278+
Stopping ...
2279+
Check 'config.log' for more information.
2280+
------------------------------------------------------------])
2281+
fi
2282+
fi
2283+
2284+
AC_ARG_ENABLE([sz],
2285+
[AS_HELP_STRING([--enable-sz],
2286+
[Enable sz compression method support. @<:@default: disabled@:>@])],
2287+
[enable_sz=${enableval}], [enable_sz=no]
2288+
)
2289+
2290+
ENABLE_SZ=0
2291+
if test "x$enable_sz" = "xyes" ; then
2292+
AC_DEFINE(ENABLE_SZ)
2293+
ENABLE_SZ=1
2294+
fi
2295+
AC_SUBST(ENABLE_SZ)
2296+
AM_CONDITIONAL(ENABLE_SZ, [test x$enable_sz = xyes])
2297+
2298+
2299+
if test "x$enable_sz" = "xyes" ; then
2300+
SZ_INSTALL=""
2301+
AC_ARG_WITH(sz,
2302+
[AS_HELP_STRING([--with-sz=/path/to/implementation],
2303+
[installation prefix for sz implementation])],
2304+
if test "x${withval}" = xyes; then
2305+
AC_MSG_ERROR(--with-sz is set but the value is NULL)
2306+
else
2307+
SZ_INSTALL=${withval}
2308+
fi
2309+
)
2310+
2311+
if test "x${SZ_INSTALL}" != x ; then
2312+
CPPFLAGS+=" -I${SZ_INSTALL}/include"
2313+
LDFLAGS+=" -L${SZ_INSTALL}/lib"
2314+
LIBS+=" -lSZ -lzstd"
2315+
fi
2316+
2317+
LIBS+=" -lm -ldl"
2318+
2319+
have_sz=no
2320+
AC_MSG_CHECKING(SZ library)
2321+
AC_SEARCH_LIBS([deflate], [z], [have_sz=yes], [have_sz=no])
2322+
if test "x${have_sz}" = xyes; then
2323+
AC_CHECK_HEADERS([sz.h], [], [have_sz=no])
2324+
fi
2325+
2326+
if test "x${have_sz}" = xno; then
2327+
AC_MSG_ERROR([
2328+
------------------------------------------------------------
2329+
The SZ library and header file are required to build
2330+
PnetCDF with SZ compression support. Use option
2331+
--with-sz=/path/to/implementation
2332+
to specify the location of SZ build.
2333+
Stopping ...
2334+
Check 'config.log' for more information.
2335+
------------------------------------------------------------])
2336+
fi
2337+
fi
2338+
22132339
ADIOS_INSTALL=""
22142340
AC_ARG_WITH(adios,
22152341
[AS_HELP_STRING([--with-adios@<:@=DIR@:>@],

src/dispatchers/file.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,11 @@ ncmpi_create(MPI_Comm comm,
438438
if (enable_bb_driver)
439439
driver = ncbbio_inq_driver();
440440
else
441+
#endif
442+
#ifdef ENABLE_COMPRESSION
443+
if (enable_zip_driver)
444+
driver = nczipio_inq_driver();
445+
else
441446
#endif
442447
/* default is the driver built on top of MPI-IO */
443448
driver = ncmpio_inq_driver();
@@ -692,6 +697,11 @@ ncmpi_open(MPI_Comm comm,
692697
if (enable_bb_driver)
693698
driver = ncbbio_inq_driver();
694699
else
700+
#endif
701+
#ifdef ENABLE_COMPRESSION
702+
if (enable_zip_driver)
703+
driver = nczipio_inq_driver();
704+
else
695705
#endif
696706
{
697707
/* ncmpio driver */

src/drivers/Makefile.am

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ if ENABLE_ADIOS
2424
SUBDIRS += ncadios
2525
endif
2626

27+
if ENABLE_COMPRESSION
28+
SUBDIRS += nczipio
29+
endif
30+
2731
DIST_SUBDIRS = include common ncmpio ncfoo ncbbio nc4io ncadios
2832

2933
# For VPATH build (parallel build), try delete all sub-directories
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Note for Developers
2+
3+
### Table of contents
4+
- [Future Work]
5+
- [Internal global attributes]
6+
- [Anchor variable (one per variable with chunking enabled)]
7+
- [Reference table]
8+
- [Chunks]
9+
- [Requirement for compressed variables]
10+
11+
---
12+
13+
## Internal global attributes:
14+
* Number of chunked variables
15+
16+
## Anchor variable (one per variable with chunking enabled):
17+
* A scalar variable
18+
* Data type is the same as user defined
19+
* Internal attributes
20+
+ Dimension IDs are saved as an attribute of an array of integer type
21+
+ Number of dimensions is saved as an internal attribute
22+
+ An attribute to tell whether it is a fixed-size or record variable
23+
+ An attribute offset pointer to reference table
24+
* For fixed-size variable, it is a scalar
25+
* For record variable, it is an array of 8-type integers, one for each record
26+
* This array can be allocated in multiple of 16 for example
27+
* Need an integer for allocated size, e.g. multiple of 16
28+
* Need an integer for size (true number of records written)
29+
+ An attributes for chunk sizes, an integer array
30+
+ An attributes for compression algorithm
31+
+ An attributes for compression level
32+
* If a variable missing these internal attributes, it is a traditional variable
33+
34+
## Reference table:
35+
* An array stores offsets of individual chunks
36+
* Not a NetCDF variable. But we use the CDF5 format specification to define it
37+
+ TODO: give it a formal spec in BNF grammar
38+
* For a fixed-size variable, it is a 1D array of size equal to the number of chunks
39+
* This table is loaded into memory when calling ncmpi_inq_varid
40+
* For blocking API, it is sync-ed and written to file by root
41+
+ TODO: in future, it can be written by multiple ranks in parallel
42+
* For nonblocking API, multiple tables are written by multiple ranks in parallel
43+
44+
## Chunks:
45+
* Chunks are not NetCDF variables
46+
+ TODO: give it a formal spec in BNF grammar?
47+
* Chunks are stored in space between NetCDF variables, i.e. padding areas in files
48+
* Data is type-converted and byte-swapped before compression
49+
* In principle, chunks should be stored in file contiguously with each other,
50+
for all variables. But they are not required to be stored contiguously.
51+
* The storage order of chunks is in row major
52+
53+
## Requirement for compressed variables:
54+
* Collective I/O only (this is the same required by HDF5)
55+
* Must be chunked (same as HDF5)
56+
57+
58+
## Future Work
59+
* Reuse metadata accross variables
60+
- Variable from same simulation space may have same access apttern.
61+
- Instead of generating variable metadata and indexx table separately, we can
62+
share information accross variables.
63+
- Chunk sizeand chunk ownership info can be reused.
64+
* Data seiving
65+
- When rewriting to a chunk, we do't need to read the background if it is
66+
fully overwritten.
67+
- Need an efficient way to determine whether a chunk is fully rewrititen.
68+
- It may be infesible due to communication and computation cost.
69+
- HDF5 approximate this by checking if owner fully rewriten the chunk.
70+
* Reuse metadata accross records
71+
- I/O pattern accross time steps are likely the same.
72+
- If we detect same I/O pattern as previous record, we can skip sending the metadata.
73+
- MPI datatype created for previous timestep can also be reused.
74+
---

src/drivers/nczipio/Makefile.am

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#
2+
# Copyright (C) 2012, Northwestern University and Argonne National Laboratory
3+
# See COPYRIGHT notice in top-level directory.
4+
#
5+
# $Id: Makefile.am 3283 2017-07-30 21:10:11Z wkliao $
6+
#
7+
# @configure_input@
8+
9+
SUFFIXES = .a .o .c .m4 .h
10+
11+
AM_CPPFLAGS = -I${top_srcdir}/src/include
12+
AM_CPPFLAGS += -I${top_builddir}/src/include
13+
AM_CPPFLAGS += -I${top_srcdir}/src/drivers/include
14+
AM_CPPFLAGS += -I${top_builddir}/src/drivers/include
15+
16+
if PNETCDF_DEBUG
17+
AM_CPPFLAGS += -DPNETCDF_DEBUG
18+
endif
19+
20+
if PNETCDF_PROFILING
21+
AM_CPPFLAGS += -DPNETCDF_PROFILING
22+
endif
23+
24+
noinst_LTLIBRARIES = libnczipio.la
25+
26+
M4FLAGS += -I${top_srcdir}/m4
27+
if ENABLE_ERANGE_FILL
28+
M4FLAGS += -DERANGE_FILL
29+
endif
30+
31+
M4_SRCS = nczipioi_profile.m4 \
32+
nczipioi_convert.m4
33+
34+
M4H_SRCS = nczipioi_profile.m4h
35+
36+
H_SRCS = nczipio_driver.h
37+
38+
C_SRCS = nczipio_attr.c \
39+
nczipio_dim.c \
40+
nczipio_driver.c \
41+
nczipio_file.c \
42+
nczipio_var.c \
43+
nczipio_internal.c \
44+
nczipioi_util.c \
45+
nczipioi_put_var.c \
46+
nczipioi_get_var.c \
47+
nczipioi_put_varn.c \
48+
nczipioi_get_varn.c \
49+
nczipioi_iput_var.c \
50+
nczipioi_iget_var.c \
51+
nczipioi_iput.c \
52+
nczipioi_iget.c \
53+
nczipioi_nonblocking.c \
54+
nczipioi_cache.c \
55+
nczipioi_chunk.c \
56+
nczipioi_chunk_size.c \
57+
nczipioi_chunk_owner.c \
58+
nczipioi_var_init.c \
59+
nczipioi_var_resize.c \
60+
nczipioi_var_wr.c \
61+
nczipioi_var_rd.c \
62+
nczipioi_lists.c \
63+
nczipioi_nonblocking.c \
64+
nczipioi_wait.c \
65+
nczip_dummy.c
66+
67+
if ENABLE_ZLIB
68+
C_SRCS += nczip_zlib.c
69+
endif
70+
71+
if ENABLE_SZ
72+
C_SRCS += nczip_sz.c
73+
endif
74+
75+
$(M4_SRCS:.m4=.c): Makefile
76+
$(M4H_SRCS:.m4h=.h): Makefile
77+
78+
.m4.c:
79+
$(M4) $(AM_M4FLAGS) $(M4FLAGS) $< >$@
80+
81+
.m4h.h:
82+
$(M4) $(AM_M4FLAGS) $(M4FLAGS) $< >$@
83+
84+
libnczipio_la_SOURCES = $(C_SRCS) $(H_SRCS)
85+
nodist_libnczipio_la_SOURCES = $(M4_SRCS:.m4=.c) $(M4H_SRCS:.m4h=.h)
86+
87+
# automake says "... BUILT_SOURCES is honored only by 'make all', 'make check',
88+
# and 'make install'. This means you cannot build a specific target (e.g.,
89+
# 'make target') in a clean tree if it depends on a built source."
90+
BUILT_SOURCES = $(M4_SRCS:.m4=.c) $(M4H_SRCS:.m4h=.h)
91+
92+
CLEANFILES = $(M4_SRCS:.m4=.c) $(M4H_SRCS:.m4h=.h) core core.* *.gcda *.gcno *.gcov gmon.out
93+
94+
EXTRA_DIST = $(M4_HFILES) $(M4_SRCS) $(M4H_SRCS)
95+
96+
tests-local: all
97+

0 commit comments

Comments
 (0)