Skip to content

Commit

Permalink
Merge pull request #131 from Parallel-NetCDF/large_req
Browse files Browse the repository at this point in the history
Support large single requests made by a process
  • Loading branch information
wkliao authored Mar 14, 2024
2 parents e6a6177 + ca0c3f4 commit 1f03432
Show file tree
Hide file tree
Showing 74 changed files with 1,505 additions and 495 deletions.
3 changes: 3 additions & 0 deletions benchmarks/C/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/FLASH-IO/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
41 changes: 21 additions & 20 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,13 @@ AH_TEMPLATE([HDF5_VER_GE_1_10_4], [Define if HDF5 version is at least 1.10
AH_TEMPLATE([NETCDF_GE_4_5_0], [Define if NetCDF version is at least 4.5.0])
AH_TEMPLATE([PNC_MALLOC_TRACE], [Define if to enable malloc tracing])
AH_TEMPLATE([RELAX_COORD_BOUND], [Define if relaxed coordinate check is enabled])
AH_TEMPLATE([ENABLE_LARGE_SINGLE_REQ], [Define if to enable large single MPI-IO request])
AH_TEMPLATE([ENABLE_NULL_BYTE_HEADER_PADDING], [Define if to enable strict null-byte padding in file header])
AH_TEMPLATE([ENABLE_BURST_BUFFER], [Define if to enable burst buffer feature])
AH_TEMPLATE([PNETCDF_PROFILING], [Define if to enable PnetCDF internal performance profiling])
AH_TEMPLATE([ENABLE_THREAD_SAFE], [Define if to enable thread-safe capability])
AH_TEMPLATE([ENABLE_REQ_AGGREGATION], [Define if able to support request aggregation in nonblocking routines])
dnl AH_TEMPLATE([HAVE_MPI_COUNT], [Define if type MPI_Count is defined])
AH_TEMPLATE([HAVE_MPI_LARGE_COUNT], [Define if required MPI APIs have arguments of type MPI_Count])

AH_TOP([#ifndef _CONFIG_H
#define _CONFIG_H])
Expand Down Expand Up @@ -1315,6 +1315,26 @@ dnl MPI_Type_create_struct \
dnl MPI_Type_create_resized \
dnl MPI_Type_get_extent])

dnl MPI_count was first introduced in MPI 3.0. Check MPI functions that make
dnl use of MPI_Count.
have_mpi_large_count_apis=yes
AC_CHECK_FUNCS([MPI_Type_create_subarray_c \
MPI_Type_contiguous_c \
MPI_Type_create_hvector_c \
MPI_Type_create_struct_c \
MPI_Type_create_hindexed_c \
MPI_Type_vector_c \
MPI_Pack_c \
MPI_Unpack_c \
MPI_Type_get_true_extent_x \
MPI_Type_size_x], [], [have_mpi_large_count_apis=no])
# If one of the above APIs is not available, have_mpi_large_count_apis will be
# set to no
UD_MSG_DEBUG([have_mpi_large_count_apis=$have_mpi_large_count_apis])
if test "x$have_mpi_large_count_apis" = "xyes" ; then
AC_DEFINE(HAVE_MPI_LARGE_COUNT, 1)
fi

dnl Check presence of MPI COMBINERS. These are of type int.
dnl These are introduced in MPI 2.0. As PnetCDF requires an MPI library that
dnl supports MPI-IO and MPI-IO was first introduced in MPI 2.0, checking these
Expand Down Expand Up @@ -2256,21 +2276,6 @@ AC_SUBST(LATEX)
AC_SUBST(DVIPDF)
AM_CONDITIONAL([HAS_LATEX], [test "x$has_latex" = xyes])

AC_ARG_ENABLE([large-single-req],
[AS_HELP_STRING([--enable-large-single-req],
[Enable large (> 2 GiB) single request in individual MPI-IO
calls. Note some MPI-IO libraries may not support this.
@<:@default: disabled@:>@])],
[large_single_req=${enableval}], [large_single_req=no]
)
ENABLE_LARGE_SINGLE_REQ=0
if test "x${large_single_req}" = xyes ; then
ENABLE_LARGE_SINGLE_REQ=1
AC_DEFINE(ENABLE_LARGE_SINGLE_REQ)
fi
AM_CONDITIONAL(ENABLE_LARGE_SINGLE_REQ, [test x$large_single_req = xyes])
AC_SUBST(ENABLE_LARGE_SINGLE_REQ)

AC_ARG_ENABLE([large-file-test],
[AS_HELP_STRING([--enable-large-file-test],
[Enable testing for large (>4GB) file/variable I/O. Note
Expand Down Expand Up @@ -2722,10 +2727,6 @@ if test "x${enable_subfiling}" = xyes; then
echo "\
Subfiling support - enabled"
fi
if test "x${large_single_req}" = xyes ; then
echo "\
Allow large (> 2GiB) single MPI-IO requests - enabled"
fi
if test "x${thread_safe}" = xyes ; then
echo "\
Thread-safe capability - enabled"
Expand Down
3 changes: 3 additions & 0 deletions examples/C/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions examples/CXX/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions examples/F77/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions examples/F90/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions examples/adios/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
3 changes: 3 additions & 0 deletions examples/burst_buffer/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
# echo "---- exec=$i"
for j in ${safe_modes} ; do
Expand Down
3 changes: 3 additions & 0 deletions examples/tutorial/parallel_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ else
safe_modes="0"
fi

# prevent user environment setting of PNETCDF_HINTS to interfere
unset PNETCDF_HINTS

for i in ${check_PROGRAMS} ; do
for j in ${safe_modes} ; do
if test "$j" = 1 ; then # test only in safe mode
Expand Down
18 changes: 17 additions & 1 deletion sneak_peek.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,26 @@ This is essentially a placeholder for the next release note ...
```
ncmpi_put_vara_float_all(ncid, varid, start, count, buf);
```
+ PnetCDF now allows a single read/write request from a process of size
larger than 2 GiB. Such large requests are now passed down to the MP-IO
library. This change is because MPI 3.0 introduces the large count feature,
including MPI_Count data type, MPI_XXX_c and MPI_XXX_x APIs that use 8-byte
integer type to enable large MPI operations. As many MPI libraries today
have implemented this feature, PnetCDF can now take advantage of it to
support large single requests. Because of this change configure option
`--enable-large-single-req` is thus deprecated. See
See [PR #131](https://github.com/Parallel-NetCDF/PnetCDF/pull/131)

* New optimization
+ none

* New Limitations
+ none
+ Hint `nc_header_read_chunk_size` is limited to `NC_MAX_INT`. PnetCDF reads
file header in chunks. This hint customizes the chunk size.

* Configure options
+ `--enable-large-single-req` has been removed, as PnetCDF now allows a
single reqd/write request of size larger than 2 GiB.
+ `--disable-file-sync` is now deprecated. This configure option alone does
not provide a sufficient data consistency. Users are suggested to call
`ncmpi_sync` and `MPI_Barrier` to achieve a desired consistency.
Expand Down Expand Up @@ -146,6 +158,7 @@ This is essentially a placeholder for the next release note ...
See [PR #79](https://github.com/Parallel-NetCDF/PnetCDF/pull/79).

* Bug fixes
+ Fix hint values that are actually used. See commit 41e8ef8.
+ Fix residual values of `v_align` and `r_align` when re-entering the define
mode multiple times.
See [PR #126](https://github.com/Parallel-NetCDF/PnetCDF/pull/126).
Expand All @@ -166,6 +179,9 @@ This is essentially a placeholder for the next release note ...
+ none

* New test program
+ test/largefile/large_header.c - test file header size larger than 2 GiB.
+ test/largefile/large_reqs.c - test a single read/write request of size
larger than 2 GiB.
+ test/testcases/tst_redefine.c - test multiple entries of `ncmpi__enddef`
[PR #126](https://github.com/Parallel-NetCDF/PnetCDF/pull/126).
+ test/testcases/tst_symlink.c - test `NC_CLOBBER` on a symbolic link.
Expand Down
1 change: 0 additions & 1 deletion src/binding/f77/pnetcdf.inc.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
parameter (PNETCDF_SUBFILING = @ENABLE_SUBFILING@)
parameter (PNETCDF_RELAX_COORD_BOUND = @RELAX_COORD_BOUND@)
parameter (PNETCDF_DEBUG_MODE = @PNETCDF_DEBUG@)
parameter (PNETCDF_LARGE_SINGLE_REQ = @ENABLE_LARGE_SINGLE_REQ@)
parameter (PNETCDF_NULL_BYTE_HEADER_PADDING = @ENABLE_NULL_BYTE_HEADER_PADDING@)
parameter (PNETCDF_BYTE_SWAP_IN_PLACE = @IN_PLACE_SWAP@) ! -1 means auto
parameter (PNETCDF_BURST_BUFFERING = @ENABLE_BURST_BUFFER@)
Expand Down
1 change: 0 additions & 1 deletion src/binding/f90/nfmpi_constants.fh.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
PNETCDF_SUBFILING = @ENABLE_SUBFILING@, &
PNETCDF_RELAX_COORD_BOUND = @RELAX_COORD_BOUND@, &
PNETCDF_DEBUG_MODE = @PNETCDF_DEBUG@, &
PNETCDF_LARGE_SINGLE_REQ = @ENABLE_LARGE_SINGLE_REQ@, &
PNETCDF_NULL_BYTE_HEADER_PADDING = @ENABLE_NULL_BYTE_HEADER_PADDING@, &
PNETCDF_BYTE_SWAP_IN_PLACE = @IN_PLACE_SWAP@, & ! -1 means auto
PNETCDF_BURST_BUFFERING = @ENABLE_BURST_BUFFER@, &
Expand Down
60 changes: 42 additions & 18 deletions src/drivers/common/create_imaptype.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ int
ncmpii_create_imaptype(int ndims,
const MPI_Offset *count, /* [ndims] */
const MPI_Offset *imap, /* [ndims] */
MPI_Datatype ptype, /* element type in buftype */
MPI_Datatype itype, /* element type */
MPI_Datatype *imaptype)/* out */
{
int dim, el_size, mpireturn;
Expand Down Expand Up @@ -55,7 +55,8 @@ ncmpii_create_imaptype(int ndims,
if (dim == -1) /* imap is a contiguous layout */
return NC_NOERR;

MPI_Type_size(ptype, &el_size);
/* itype: element data type (MPI primitive type) */
MPI_Type_size(itype, &el_size);

/* We have a true varm call, as imap gives non-contiguous layout.
* User buffer will be packed (write case) or unpacked (read case)
Expand All @@ -66,17 +67,28 @@ ncmpii_create_imaptype(int ndims,
* dim is the first dimension (C order, eg. ZYX) that has
* non-contiguous imap.
*/
if (imap_contig_blocklen != (int)imap_contig_blocklen)
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
if (count[dim] != (int)count[dim] || imap[dim] != (int)imap[dim])
if (imap_contig_blocklen > NC_MAX_INT || count[dim] > NC_MAX_INT ||
imap[dim] > NC_MAX_INT) {
#ifdef HAVE_MPI_LARGE_COUNT
mpireturn = MPI_Type_vector_c(count[dim], imap_contig_blocklen,
imap[dim], itype, imaptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector_c");
DEBUG_RETURN_ERROR(NC_EMPI)
}
#else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)

mpireturn = MPI_Type_vector((int)count[dim], (int)imap_contig_blocklen,
(int)imap[dim], ptype, imaptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector");
DEBUG_RETURN_ERROR(NC_EMPI)
#endif
}
else {
mpireturn = MPI_Type_vector((int)count[dim], (int)imap_contig_blocklen,
(int)imap[dim], itype, imaptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector");
DEBUG_RETURN_ERROR(NC_EMPI)
}
}

mpireturn = MPI_Type_commit(imaptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_commit");
Expand All @@ -85,14 +97,26 @@ ncmpii_create_imaptype(int ndims,

for (dim--; dim>=0; dim--) {
MPI_Datatype tmptype;
if (count[dim] != (int)count[dim])
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)

mpireturn = MPI_Type_create_hvector((int)count[dim], 1,
imap[dim]*el_size, *imaptype, &tmptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector");
DEBUG_RETURN_ERROR(NC_EMPI)
if (count[dim] > NC_MAX_INT) {
#ifdef HAVE_MPI_LARGE_COUNT
mpireturn = MPI_Type_create_hvector_c(count[dim], 1,
imap[dim]*el_size, *imaptype, &tmptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector_c");
DEBUG_RETURN_ERROR(NC_EMPI)
}
#else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
#endif
}
else {
mpireturn = MPI_Type_create_hvector((int)count[dim], 1,
imap[dim]*el_size, *imaptype, &tmptype);
if (mpireturn != MPI_SUCCESS) {
ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector");
DEBUG_RETURN_ERROR(NC_EMPI)
}
}

mpireturn = MPI_Type_free(imaptype);
Expand Down
7 changes: 3 additions & 4 deletions src/drivers/common/dtype_decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,22 +71,21 @@ dtype_filter(MPI_Datatype type)

/* 4-byte integer types */
{
MPI_Datatype int_4byte;
#if (SIZEOF_LONG == 4)
MPI_Datatype uint_4byte;
#endif
#if (SIZEOF_INT == 4)
int_4byte = MPI_INT;
MPI_Datatype int_4byte = MPI_INT;
#if (SIZEOF_LONG == 4)
uint_4byte = MPI_UNSIGNED;
#endif
#elif (SIZEOF_SHORT == 4)
int_4byte = MPI_SHORT;
MPI_Datatype int_4byte = MPI_SHORT;
#if (SIZEOF_LONG == 4)
uint_4byte = MPI_UNSIGNED_SHORT;
#endif
#else
int_4byte = MPI_DATATYPE_NULL; /* no 4-byte type? */
MPI_Datatype int_4byte = MPI_DATATYPE_NULL; /* no 4-byte type? */
#if (SIZEOF_LONG == 4)
uint_4byte = MPI_DATATYPE_NULL;
#endif
Expand Down
Loading

0 comments on commit 1f03432

Please sign in to comment.