From 7c5516b32d9854f3cc4bf47a30dad5a29ff7e799 Mon Sep 17 00:00:00 2001 From: wkliao Date: Sun, 10 Mar 2024 14:16:31 -0500 Subject: [PATCH 1/9] silence warning of variable set but not used --- src/drivers/common/dtype_decode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/drivers/common/dtype_decode.c b/src/drivers/common/dtype_decode.c index d476543e6..e83f7f23c 100644 --- a/src/drivers/common/dtype_decode.c +++ b/src/drivers/common/dtype_decode.c @@ -71,22 +71,21 @@ dtype_filter(MPI_Datatype type) /* 4-byte integer types */ { - MPI_Datatype int_4byte; #if (SIZEOF_LONG == 4) MPI_Datatype uint_4byte; #endif #if (SIZEOF_INT == 4) - int_4byte = MPI_INT; + MPI_Datatype int_4byte = MPI_INT; #if (SIZEOF_LONG == 4) uint_4byte = MPI_UNSIGNED; #endif #elif (SIZEOF_SHORT == 4) - int_4byte = MPI_SHORT; + MPI_Datatype int_4byte = MPI_SHORT; #if (SIZEOF_LONG == 4) uint_4byte = MPI_UNSIGNED_SHORT; #endif #else - int_4byte = MPI_DATATYPE_NULL; /* no 4-byte type? */ + MPI_Datatype int_4byte = MPI_DATATYPE_NULL; /* no 4-byte type? */ #if (SIZEOF_LONG == 4) uint_4byte = MPI_DATATYPE_NULL; #endif From 9ddfb8a68377c0a853fb622a0cbcc27fe00456a8 Mon Sep 17 00:00:00 2001 From: wkliao Date: Sun, 10 Mar 2024 14:25:26 -0500 Subject: [PATCH 2/9] Deprecate configure option --enable-large-single-req --- configure.ac | 20 -------------------- src/binding/f77/pnetcdf.inc.in | 1 - src/binding/f90/nfmpi_constants.fh.in | 1 - src/include/pnetcdf.h.in | 1 - src/utils/pnetcdf-config.in | 14 -------------- 5 files changed, 37 deletions(-) diff --git a/configure.ac b/configure.ac index 6ab83a56e..be0d823a3 100644 --- a/configure.ac +++ b/configure.ac @@ -139,7 +139,6 @@ AH_TEMPLATE([HDF5_VER_GE_1_10_4], [Define if HDF5 version is at least 1.10 AH_TEMPLATE([NETCDF_GE_4_5_0], [Define if NetCDF version is at least 4.5.0]) AH_TEMPLATE([PNC_MALLOC_TRACE], [Define if to enable malloc tracing]) AH_TEMPLATE([RELAX_COORD_BOUND], [Define if relaxed coordinate check is enabled]) -AH_TEMPLATE([ENABLE_LARGE_SINGLE_REQ], [Define if to enable large single MPI-IO request]) AH_TEMPLATE([ENABLE_NULL_BYTE_HEADER_PADDING], [Define if to enable strict null-byte padding in file header]) AH_TEMPLATE([ENABLE_BURST_BUFFER], [Define if to enable burst buffer feature]) AH_TEMPLATE([PNETCDF_PROFILING], [Define if to enable PnetCDF internal performance profiling]) @@ -2256,21 +2255,6 @@ AC_SUBST(LATEX) AC_SUBST(DVIPDF) AM_CONDITIONAL([HAS_LATEX], [test "x$has_latex" = xyes]) -AC_ARG_ENABLE([large-single-req], - [AS_HELP_STRING([--enable-large-single-req], - [Enable large (> 2 GiB) single request in individual MPI-IO - calls. Note some MPI-IO libraries may not support this. - @<:@default: disabled@:>@])], - [large_single_req=${enableval}], [large_single_req=no] -) -ENABLE_LARGE_SINGLE_REQ=0 -if test "x${large_single_req}" = xyes ; then - ENABLE_LARGE_SINGLE_REQ=1 - AC_DEFINE(ENABLE_LARGE_SINGLE_REQ) -fi -AM_CONDITIONAL(ENABLE_LARGE_SINGLE_REQ, [test x$large_single_req = xyes]) -AC_SUBST(ENABLE_LARGE_SINGLE_REQ) - AC_ARG_ENABLE([large-file-test], [AS_HELP_STRING([--enable-large-file-test], [Enable testing for large (>4GB) file/variable I/O. Note @@ -2722,10 +2706,6 @@ if test "x${enable_subfiling}" = xyes; then echo "\ Subfiling support - enabled" fi -if test "x${large_single_req}" = xyes ; then - echo "\ - Allow large (> 2GiB) single MPI-IO requests - enabled" -fi if test "x${thread_safe}" = xyes ; then echo "\ Thread-safe capability - enabled" diff --git a/src/binding/f77/pnetcdf.inc.in b/src/binding/f77/pnetcdf.inc.in index 20dea2fea..fc7a8a4e2 100644 --- a/src/binding/f77/pnetcdf.inc.in +++ b/src/binding/f77/pnetcdf.inc.in @@ -37,7 +37,6 @@ parameter (PNETCDF_SUBFILING = @ENABLE_SUBFILING@) parameter (PNETCDF_RELAX_COORD_BOUND = @RELAX_COORD_BOUND@) parameter (PNETCDF_DEBUG_MODE = @PNETCDF_DEBUG@) - parameter (PNETCDF_LARGE_SINGLE_REQ = @ENABLE_LARGE_SINGLE_REQ@) parameter (PNETCDF_NULL_BYTE_HEADER_PADDING = @ENABLE_NULL_BYTE_HEADER_PADDING@) parameter (PNETCDF_BYTE_SWAP_IN_PLACE = @IN_PLACE_SWAP@) ! -1 means auto parameter (PNETCDF_BURST_BUFFERING = @ENABLE_BURST_BUFFER@) diff --git a/src/binding/f90/nfmpi_constants.fh.in b/src/binding/f90/nfmpi_constants.fh.in index bc1778bde..ea6d31d37 100644 --- a/src/binding/f90/nfmpi_constants.fh.in +++ b/src/binding/f90/nfmpi_constants.fh.in @@ -24,7 +24,6 @@ PNETCDF_SUBFILING = @ENABLE_SUBFILING@, & PNETCDF_RELAX_COORD_BOUND = @RELAX_COORD_BOUND@, & PNETCDF_DEBUG_MODE = @PNETCDF_DEBUG@, & - PNETCDF_LARGE_SINGLE_REQ = @ENABLE_LARGE_SINGLE_REQ@, & PNETCDF_NULL_BYTE_HEADER_PADDING = @ENABLE_NULL_BYTE_HEADER_PADDING@, & PNETCDF_BYTE_SWAP_IN_PLACE = @IN_PLACE_SWAP@, & ! -1 means auto PNETCDF_BURST_BUFFERING = @ENABLE_BURST_BUFFER@, & diff --git a/src/include/pnetcdf.h.in b/src/include/pnetcdf.h.in index a26fc2776..945451a38 100644 --- a/src/include/pnetcdf.h.in +++ b/src/include/pnetcdf.h.in @@ -27,7 +27,6 @@ #define PNETCDF_SUBFILING @ENABLE_SUBFILING@ #define PNETCDF_RELAX_COORD_BOUND @RELAX_COORD_BOUND@ #define PNETCDF_DEBUG_MODE @PNETCDF_DEBUG@ -#define PNETCDF_LARGE_SINGLE_REQ @ENABLE_LARGE_SINGLE_REQ@ #define PNETCDF_NULL_BYTE_HEADER_PADDING @ENABLE_NULL_BYTE_HEADER_PADDING@ #define PNETCDF_BYTE_SWAP_IN_PLACE @IN_PLACE_SWAP@ #define PNETCDF_BURST_BUFFERING @ENABLE_BURST_BUFFER@ diff --git a/src/utils/pnetcdf-config.in b/src/utils/pnetcdf-config.in index 4a53e42c6..4fcb4a62c 100644 --- a/src/utils/pnetcdf-config.in +++ b/src/utils/pnetcdf-config.in @@ -61,7 +61,6 @@ has_fortran="@has_fortran@" has_cxx="@has_mpicxx@" enable_erange_fill="@ENABLE_ERANGE_FILL@" enable_subfiling="@ENABLE_SUBFILING@" -enable_large_single_req="@ENABLE_LARGE_SINGLE_REQ@" enable_null_byte_header_padding="@ENABLE_NULL_BYTE_HEADER_PADDING@" enable_relax_coord_bound="@RELAX_COORD_BOUND@" is_bigendian="@ac_cv_c_bigendian@" @@ -175,11 +174,6 @@ if test "x$enable_subfiling" = x1; then else echo " --subfiling -> disabled" fi -if test "x$enable_large_single_req" = x1; then - echo " --large-single-req -> enabled" -else - echo " --large-single-req -> disabled" -fi if test "x$enable_null_byte_header_padding" = x1; then echo " --null-byte-header-padding -> enabled" else @@ -389,14 +383,6 @@ else fi ;; - --large-single-req) -if test "x$enable_large_single_req" = x1; then - echo "enabled" -else - echo "disabled" -fi - ;; - --null-byte-header-padding) if test "x$enable_null_byte_header_padding" = x1; then echo "enabled" From 0dcf6284106e42faa5e8fb9ab1aa5d52917ff892 Mon Sep 17 00:00:00 2001 From: wkliao Date: Sun, 10 Mar 2024 14:36:04 -0500 Subject: [PATCH 3/9] fix: hint values actually used --- src/drivers/ncmpio/ncmpio_util.c | 42 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/drivers/ncmpio/ncmpio_util.c b/src/drivers/ncmpio/ncmpio_util.c index cd87bf7ab..33ea50235 100644 --- a/src/drivers/ncmpio/ncmpio_util.c +++ b/src/drivers/ncmpio/ncmpio_util.c @@ -46,13 +46,14 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, ncp->env_h_align = 0; if (user_info != MPI_INFO_NULL) { /* aligns the size of header extent of a newly created file */ - MPI_Info_get(user_info, "nc_header_align_size", MPI_MAX_INFO_VAL-1, value, - &flag); + MPI_Info_get(user_info, "nc_header_align_size", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { errno = 0; /* errno must set to zero before calling strtoll */ ncp->env_h_align = strtoll(value, NULL, 10); if (errno != 0) ncp->env_h_align = 0; else if (ncp->env_h_align < 0) ncp->env_h_align = 0; + sprintf(value, "%lld", ncp->env_h_align); } } if (!flag) sprintf(value, "%d", FILE_ALIGNMENT_DEFAULT); @@ -61,12 +62,14 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, ncp->env_v_align = 0; if (user_info != MPI_INFO_NULL) { /* aligns starting file offsets of individual fixed-size variables */ - MPI_Info_get(user_info, "nc_var_align_size", MPI_MAX_INFO_VAL-1, value, &flag); + MPI_Info_get(user_info, "nc_var_align_size", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { errno = 0; /* errno must set to zero before calling strtoll */ ncp->env_v_align = strtoll(value, NULL, 10); if (errno != 0) ncp->env_v_align = 0; else if (ncp->env_v_align < 0) ncp->env_v_align = 0; + sprintf(value, "%lld", ncp->env_v_align); } } if (!flag) sprintf(value, "%d", FILE_ALIGNMENT_DEFAULT); @@ -75,13 +78,14 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, ncp->env_r_align = 0; if (user_info != MPI_INFO_NULL) { /* aligns starting file offset of the record variable section */ - MPI_Info_get(user_info, "nc_record_align_size", MPI_MAX_INFO_VAL-1, value, - &flag); + MPI_Info_get(user_info, "nc_record_align_size", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { errno = 0; /* errno must set to zero before calling strtoll */ ncp->env_r_align = strtoll(value, NULL, 10); if (errno != 0) ncp->env_r_align = 0; else if (ncp->env_r_align < 0) ncp->env_r_align = 0; + sprintf(value, "%lld", ncp->env_r_align); } } if (!flag) sprintf(value, "%d", FILE_ALIGNMENT_DEFAULT); @@ -89,13 +93,18 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, if (user_info != MPI_INFO_NULL) { /* header reading chunk size */ - MPI_Info_get(user_info, "nc_header_read_chunk_size", MPI_MAX_INFO_VAL-1, value, - &flag); + MPI_Info_get(user_info, "nc_header_read_chunk_size", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { + long int chunk; errno = 0; /* errno must set to zero before calling strtoll */ - ncp->chunk = (int) strtol(value, NULL, 10); + chunk = (int) strtol(value, NULL, 10); if (errno != 0) ncp->chunk = 0; - else if (ncp->chunk < 0) ncp->chunk = 0; + else if (ncp->chunk < 0) + ncp->chunk = 0; + else if (chunk > INT_MAX) /* limit to INT_MAX */ + ncp->chunk = INT_MAX; + sprintf(value, "%d", ncp->chunk); } } if (!flag) sprintf(value, "%d", NC_DEFAULT_CHUNKSIZE); @@ -126,12 +135,14 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, /* temporal buffer size used to pack noncontiguous aggregated user * buffers when calling ncmpi_wait/wait_all, Default 16 MiB */ - MPI_Info_get(user_info, "nc_ibuf_size", MPI_MAX_INFO_VAL-1, value, &flag); + MPI_Info_get(user_info, "nc_ibuf_size", MPI_MAX_INFO_VAL-1, value, + &flag); if (flag) { MPI_Offset ibuf_size; errno = 0; /* errno must set to zero before calling strtoll */ ibuf_size = strtoll(value, NULL, 10); if (errno == 0 && ncp->ibuf_size > 0) ncp->ibuf_size = ibuf_size; + sprintf(value, "%lld", ncp->ibuf_size); } } if (!flag) sprintf(value, "%d", NC_DEFAULT_IBUF_SIZE); @@ -139,7 +150,8 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, #ifdef ENABLE_SUBFILING if (user_info != MPI_INFO_NULL) { - MPI_Info_get(user_info, "pnetcdf_subfiling", MPI_MAX_INFO_VAL-1, value, &flag); + MPI_Info_get(user_info, "pnetcdf_subfiling", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { if (strcasecmp(value, "enable") == 0) ncp->subfile_mode = 1; @@ -149,12 +161,14 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, MPI_Info_set(info_used, "pnetcdf_subfiling", value); if (user_info != MPI_INFO_NULL) { - MPI_Info_get(user_info, "nc_num_subfiles", MPI_MAX_INFO_VAL-1, value, &flag); + MPI_Info_get(user_info, "nc_num_subfiles", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { errno = 0; ncp->num_subfiles = strtoll(value, NULL, 10); if (errno != 0) ncp->num_subfiles = 0; else if (ncp->num_subfiles < 0) ncp->num_subfiles = 0; + sprintf(value, "%d", ncp->num_subfiles); } } if (!flag) strcpy(value, "0"); @@ -168,8 +182,8 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, if (user_info != MPI_INFO_NULL) { /* read/write file header using MPI collective APIs */ - MPI_Info_get(user_info, "nc_header_collective", MPI_MAX_INFO_VAL-1, value, - &flag); + MPI_Info_get(user_info, "nc_header_collective", MPI_MAX_INFO_VAL-1, + value, &flag); if (flag) { if (strcasecmp(value, "true") == 0) fSet((ncp)->flags, NC_HCOLL); From acb7e419df41584f9f293026d0a7312d2624cadb Mon Sep 17 00:00:00 2001 From: wkliao Date: Sun, 10 Mar 2024 14:52:23 -0500 Subject: [PATCH 4/9] prevent user environment setting of PNETCDF_HINTS to interfere --- benchmarks/C/parallel_run.sh | 3 +++ benchmarks/FLASH-IO/parallel_run.sh | 3 +++ examples/C/parallel_run.sh | 3 +++ examples/CXX/parallel_run.sh | 3 +++ examples/F77/parallel_run.sh | 3 +++ examples/F90/parallel_run.sh | 3 +++ examples/adios/parallel_run.sh | 3 +++ examples/burst_buffer/parallel_run.sh | 3 +++ examples/tutorial/parallel_run.sh | 3 +++ src/utils/ncvalidator/seq_runs.sh | 3 +++ src/utils/ncvalidator/wrap_runs.sh | 3 +++ test/C/parallel_run.sh | 3 +++ test/C/seq_runs.sh | 3 +++ test/CXX/parallel_run.sh | 3 +++ test/CXX/wrap_runs.sh | 3 +++ test/F90/parallel_run.sh | 3 +++ test/F90/seq_runs.sh | 3 +++ test/F90/wrap_runs.sh | 3 +++ test/adios/parallel_run.sh | 3 +++ test/adios/wrap_runs.sh | 3 +++ test/burst_buffer/parallel_run.sh | 3 +++ test/burst_buffer/wrap_runs.sh | 3 +++ test/cdf_format/parallel_run.sh | 3 +++ test/cdf_format/seq_runs.sh | 3 +++ test/cdf_format/wrap_runs.sh | 3 +++ test/header/parallel_run.sh | 3 +++ test/header/seq_runs.sh | 3 +++ test/largefile/parallel_run.sh | 3 +++ test/largefile/seq_runs.sh | 3 +++ test/largefile/wrap_runs.sh | 3 +++ test/nc4/parallel_run.sh | 3 +++ test/nc4/wrap_runs.sh | 3 +++ test/nc_test/seq_runs.sh | 3 +++ test/nc_test/wrap_runs.sh | 3 +++ test/nf90_test/seq_runs.sh | 3 +++ test/nf_test/seq_runs.sh | 3 +++ test/nonblocking/parallel_run.sh | 3 +++ test/nonblocking/seq_runs.sh | 3 +++ test/nonblocking/wrap_runs.sh | 3 +++ test/subfile/parallel_run.sh | 3 +++ test/subfile/seq_runs.sh | 3 +++ test/testcases/parallel_run.sh | 3 +++ test/testcases/seq_runs.sh | 3 +++ test/testcases/wrap_runs.sh | 3 +++ 44 files changed, 132 insertions(+) diff --git a/benchmarks/C/parallel_run.sh b/benchmarks/C/parallel_run.sh index 1bde705f6..3570a0366 100755 --- a/benchmarks/C/parallel_run.sh +++ b/benchmarks/C/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/benchmarks/FLASH-IO/parallel_run.sh b/benchmarks/FLASH-IO/parallel_run.sh index ee7a1bad5..0ce77be9a 100755 --- a/benchmarks/FLASH-IO/parallel_run.sh +++ b/benchmarks/FLASH-IO/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/C/parallel_run.sh b/examples/C/parallel_run.sh index 9bec579d4..12408e3b0 100755 --- a/examples/C/parallel_run.sh +++ b/examples/C/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/CXX/parallel_run.sh b/examples/CXX/parallel_run.sh index 66a6185a5..7228a6bf3 100755 --- a/examples/CXX/parallel_run.sh +++ b/examples/CXX/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/F77/parallel_run.sh b/examples/F77/parallel_run.sh index e9df58d0f..87204499a 100755 --- a/examples/F77/parallel_run.sh +++ b/examples/F77/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/F90/parallel_run.sh b/examples/F90/parallel_run.sh index ed7542ce9..d640e642a 100755 --- a/examples/F90/parallel_run.sh +++ b/examples/F90/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/adios/parallel_run.sh b/examples/adios/parallel_run.sh index 2c9e42691..b2034e5e7 100755 --- a/examples/adios/parallel_run.sh +++ b/examples/adios/parallel_run.sh @@ -23,6 +23,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/examples/burst_buffer/parallel_run.sh b/examples/burst_buffer/parallel_run.sh index 1062a7cc5..53e84be32 100755 --- a/examples/burst_buffer/parallel_run.sh +++ b/examples/burst_buffer/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do # echo "---- exec=$i" for j in ${safe_modes} ; do diff --git a/examples/tutorial/parallel_run.sh b/examples/tutorial/parallel_run.sh index a3a1f96ba..db37383cc 100755 --- a/examples/tutorial/parallel_run.sh +++ b/examples/tutorial/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/src/utils/ncvalidator/seq_runs.sh b/src/utils/ncvalidator/seq_runs.sh index 40b973c7c..0e7f6316d 100755 --- a/src/utils/ncvalidator/seq_runs.sh +++ b/src/utils/ncvalidator/seq_runs.sh @@ -18,6 +18,9 @@ if [ ! -f ${VALIDATOR} ]; then exit 1 fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${BAD_FILES} ; do if [ ! -f ${srcdir}/$i ]; then echo "Failed: input test file ${srcdir}/$i does not exit" diff --git a/src/utils/ncvalidator/wrap_runs.sh b/src/utils/ncvalidator/wrap_runs.sh index d4aac3041..39c5a985e 100755 --- a/src/utils/ncvalidator/wrap_runs.sh +++ b/src/utils/ncvalidator/wrap_runs.sh @@ -7,6 +7,9 @@ # Exit immediately if a command exits with a non-zero status. set -e +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${ENULLPAD_FILES} ; do for j in 0 1 ; do export PNETCDF_SAFE_MODE=$j diff --git a/test/C/parallel_run.sh b/test/C/parallel_run.sh index 16590ee72..ee4f362c7 100755 --- a/test/C/parallel_run.sh +++ b/test/C/parallel_run.sh @@ -23,6 +23,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode export PNETCDF_HINTS="nc_header_collective=true" diff --git a/test/C/seq_runs.sh b/test/C/seq_runs.sh index 3f54932f1..6d07fc893 100755 --- a/test/C/seq_runs.sh +++ b/test/C/seq_runs.sh @@ -19,6 +19,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/CXX/parallel_run.sh b/test/CXX/parallel_run.sh index f3689fd01..37b012ad1 100755 --- a/test/CXX/parallel_run.sh +++ b/test/CXX/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${TESTPROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/CXX/wrap_runs.sh b/test/CXX/wrap_runs.sh index 8410edfa2..15473151a 100755 --- a/test/CXX/wrap_runs.sh +++ b/test/CXX/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/F90/parallel_run.sh b/test/F90/parallel_run.sh index 5e904334d..da500944d 100755 --- a/test/F90/parallel_run.sh +++ b/test/F90/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${PARALLEL_PROGS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/F90/seq_runs.sh b/test/F90/seq_runs.sh index 06dbb1598..03f82de7f 100755 --- a/test/F90/seq_runs.sh +++ b/test/F90/seq_runs.sh @@ -10,6 +10,9 @@ set -e VALIDATOR=../../src/utils/ncvalidator/ncvalidator NCMPIDIFF=../../src/utils/ncmpidiff/ncmpidiff +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + ${TESTSEQRUN} ./tst_io ${TESTOUTDIR} ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/tst_io1.nc # remove file system type prefix if there is any diff --git a/test/F90/wrap_runs.sh b/test/F90/wrap_runs.sh index 943228ab6..2c6f88622 100755 --- a/test/F90/wrap_runs.sh +++ b/test/F90/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/adios/parallel_run.sh b/test/adios/parallel_run.sh index d55df6acf..f9abd6589 100755 --- a/test/adios/parallel_run.sh +++ b/test/adios/parallel_run.sh @@ -21,6 +21,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/adios/wrap_runs.sh b/test/adios/wrap_runs.sh index 6021d97dd..e619098d0 100755 --- a/test/adios/wrap_runs.sh +++ b/test/adios/wrap_runs.sh @@ -18,6 +18,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/burst_buffer/parallel_run.sh b/test/burst_buffer/parallel_run.sh index b9442e4bc..ed3d0bf4b 100755 --- a/test/burst_buffer/parallel_run.sh +++ b/test/burst_buffer/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${TESTPROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/burst_buffer/wrap_runs.sh b/test/burst_buffer/wrap_runs.sh index 7abf6bc28..308ccfc19 100755 --- a/test/burst_buffer/wrap_runs.sh +++ b/test/burst_buffer/wrap_runs.sh @@ -20,6 +20,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/cdf_format/parallel_run.sh b/test/cdf_format/parallel_run.sh index 2f1e841a7..dd4f842da 100755 --- a/test/cdf_format/parallel_run.sh +++ b/test/cdf_format/parallel_run.sh @@ -25,6 +25,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode export PNETCDF_HINTS="nc_header_collective=true" diff --git a/test/cdf_format/seq_runs.sh b/test/cdf_format/seq_runs.sh index c3ffd5148..0c076b3e9 100755 --- a/test/cdf_format/seq_runs.sh +++ b/test/cdf_format/seq_runs.sh @@ -7,6 +7,9 @@ # Exit immediately if a command exits with a non-zero status. set -e +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + ${TESTSEQRUN} ./test_inq_format ${srcdir} # the followings check files with corrupted header diff --git a/test/cdf_format/wrap_runs.sh b/test/cdf_format/wrap_runs.sh index 796d258a9..44f2949f5 100755 --- a/test/cdf_format/wrap_runs.sh +++ b/test/cdf_format/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/header/parallel_run.sh b/test/header/parallel_run.sh index 3e3edf009..1ee813a4e 100755 --- a/test/header/parallel_run.sh +++ b/test/header/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/header/seq_runs.sh b/test/header/seq_runs.sh index 506ad9d69..15dd4287d 100755 --- a/test/header/seq_runs.sh +++ b/test/header/seq_runs.sh @@ -19,6 +19,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + # header consistency tests are designed to run on more than one MPI process for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j diff --git a/test/largefile/parallel_run.sh b/test/largefile/parallel_run.sh index 503d67649..0a7bf33e4 100755 --- a/test/largefile/parallel_run.sh +++ b/test/largefile/parallel_run.sh @@ -20,6 +20,9 @@ MPIRUN=`echo ${TESTMPIRUN} | ${SED} -e "s/NP/$1/g"` # turn off safe mode for large tests export PNETCDF_SAFE_MODE=0 +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do # echo "set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.nc diff --git a/test/largefile/seq_runs.sh b/test/largefile/seq_runs.sh index 739653f12..e28955773 100755 --- a/test/largefile/seq_runs.sh +++ b/test/largefile/seq_runs.sh @@ -15,6 +15,9 @@ OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` # disable safe mode, as large tests already run slow export PNETCDF_SAFE_MODE=0 +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${TESTPROGRAMS}; do ${TESTSEQRUN} ./$i ${TESTOUTDIR}/$i.nc ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/$i.nc diff --git a/test/largefile/wrap_runs.sh b/test/largefile/wrap_runs.sh index d30a5c69e..b298798fd 100755 --- a/test/largefile/wrap_runs.sh +++ b/test/largefile/wrap_runs.sh @@ -16,6 +16,9 @@ OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` # disable safe mode, as large tests already run slow export PNETCDF_SAFE_MODE=0 +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.nc ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/$outfile.nc rm -f ${OUTDIR}/$outfile.nc diff --git a/test/nc4/parallel_run.sh b/test/nc4/parallel_run.sh index 5cdaae2e3..6d6956749 100755 --- a/test/nc4/parallel_run.sh +++ b/test/nc4/parallel_run.sh @@ -21,6 +21,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/nc4/wrap_runs.sh b/test/nc4/wrap_runs.sh index c8e092672..10d76802b 100755 --- a/test/nc4/wrap_runs.sh +++ b/test/nc4/wrap_runs.sh @@ -19,6 +19,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/nc_test/seq_runs.sh b/test/nc_test/seq_runs.sh index a19511342..87c7abd21 100755 --- a/test/nc_test/seq_runs.sh +++ b/test/nc_test/seq_runs.sh @@ -12,6 +12,9 @@ VALIDATOR=../../src/utils/ncvalidator/ncvalidator # remove file system type prefix if there is any OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + # tst_nofill.c creates two files: tst_nofill.nc.fill and tst_nofill.nc.nofill ${TESTSEQRUN} ./tst_nofill ${TESTOUTDIR}/tst_nofill.nc ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/tst_nofill.nc.fill diff --git a/test/nc_test/wrap_runs.sh b/test/nc_test/wrap_runs.sh index 3c4975d54..7a0dcf514 100755 --- a/test/nc_test/wrap_runs.sh +++ b/test/nc_test/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/nf90_test/seq_runs.sh b/test/nf90_test/seq_runs.sh index 66b2a033c..00d21995c 100755 --- a/test/nf90_test/seq_runs.sh +++ b/test/nf90_test/seq_runs.sh @@ -15,6 +15,9 @@ export PNETCDF_SAFE_MODE=0 # remove file system type prefix if there is any OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + rm -f ${OUTDIR}/test.nc rm -f ${OUTDIR}/scratch.nc rm -f ${OUTDIR}/tooth-fairy.nc diff --git a/test/nf_test/seq_runs.sh b/test/nf_test/seq_runs.sh index beff65d36..cb419929f 100755 --- a/test/nf_test/seq_runs.sh +++ b/test/nf_test/seq_runs.sh @@ -15,6 +15,9 @@ export PNETCDF_SAFE_MODE=0 # remove file system type prefix if there is any OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + rm -f ${OUTDIR}/test.nc rm -f ${OUTDIR}/scratch.nc rm -f ${OUTDIR}/tooth-fairy.nc diff --git a/test/nonblocking/parallel_run.sh b/test/nonblocking/parallel_run.sh index aa73d1a43..6857bc5ca 100755 --- a/test/nonblocking/parallel_run.sh +++ b/test/nonblocking/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/nonblocking/seq_runs.sh b/test/nonblocking/seq_runs.sh index 80e6f4f4b..38b7a2efe 100755 --- a/test/nonblocking/seq_runs.sh +++ b/test/nonblocking/seq_runs.sh @@ -15,6 +15,9 @@ NCMPIDIFF=../../src/utils/ncmpidiff/ncmpidiff # remove file system type prefix if there is any OUTDIR=`echo "$TESTOUTDIR" | cut -d: -f2-` +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + ${TESTSEQRUN} ./mcoll_perf ${TESTOUTDIR}/testfile # seq is not available on FreeBSD otherwise we can use: for j in `seq 0 9` for j in 0 1 2 3 4 5 6 7 8 9 ; do diff --git a/test/nonblocking/wrap_runs.sh b/test/nonblocking/wrap_runs.sh index bb8423446..1128276a8 100755 --- a/test/nonblocking/wrap_runs.sh +++ b/test/nonblocking/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" diff --git a/test/subfile/parallel_run.sh b/test/subfile/parallel_run.sh index a9396026d..f033eec30 100755 --- a/test/subfile/parallel_run.sh +++ b/test/subfile/parallel_run.sh @@ -24,6 +24,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/subfile/seq_runs.sh b/test/subfile/seq_runs.sh index cbcde89ec..a4e425d07 100755 --- a/test/subfile/seq_runs.sh +++ b/test/subfile/seq_runs.sh @@ -20,6 +20,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${TESTPROGRAMS} ; do for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j diff --git a/test/testcases/parallel_run.sh b/test/testcases/parallel_run.sh index f4d802102..ea592079c 100755 --- a/test/testcases/parallel_run.sh +++ b/test/testcases/parallel_run.sh @@ -27,6 +27,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do if test "$j" = 1 ; then # test only in safe mode diff --git a/test/testcases/seq_runs.sh b/test/testcases/seq_runs.sh index 2df26ffba..e65878baf 100755 --- a/test/testcases/seq_runs.sh +++ b/test/testcases/seq_runs.sh @@ -9,6 +9,9 @@ set -e VALIDATOR=../../src/utils/ncvalidator/ncvalidator +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + ${TESTSEQRUN} ./tst_version ${TESTSEQRUN} ./put_all_kinds ${TESTOUTDIR}/put_all_kinds.nc diff --git a/test/testcases/wrap_runs.sh b/test/testcases/wrap_runs.sh index e8612fc44..1d481496f 100755 --- a/test/testcases/wrap_runs.sh +++ b/test/testcases/wrap_runs.sh @@ -22,6 +22,9 @@ else safe_modes="0" fi +# prevent user environment setting of PNETCDF_HINTS to interfere +unset PNETCDF_HINTS + for j in ${safe_modes} ; do export PNETCDF_SAFE_MODE=$j # echo "---- set PNETCDF_SAFE_MODE ${PNETCDF_SAFE_MODE}" From f764652882b36ccff5a0bfa89afbec9c75ca866f Mon Sep 17 00:00:00 2001 From: wkliao Date: Sun, 10 Mar 2024 19:56:21 -0500 Subject: [PATCH 5/9] echo when testing burst buffering --- test/C/parallel_run.sh | 2 +- test/C/seq_runs.sh | 3 ++- test/CXX/parallel_run.sh | 2 +- test/CXX/wrap_runs.sh | 4 +++- test/F90/parallel_run.sh | 2 +- test/F90/seq_runs.sh | 2 ++ test/F90/wrap_runs.sh | 4 +++- test/cdf_format/parallel_run.sh | 2 +- test/cdf_format/seq_runs.sh | 3 ++- test/cdf_format/wrap_runs.sh | 4 +++- test/header/parallel_run.sh | 2 +- test/header/seq_runs.sh | 3 ++- test/largefile/parallel_run.sh | 2 +- test/largefile/wrap_runs.sh | 3 ++- test/nc_test/seq_runs.sh | 3 ++- test/nc_test/wrap_runs.sh | 4 +++- test/nf90_test/seq_runs.sh | 3 ++- test/nf_test/seq_runs.sh | 3 ++- test/nonblocking/parallel_run.sh | 2 +- test/nonblocking/seq_runs.sh | 3 ++- test/nonblocking/wrap_runs.sh | 4 +++- test/subfile/parallel_run.sh | 2 +- test/subfile/seq_runs.sh | 4 +++- test/testcases/parallel_run.sh | 2 +- test/testcases/seq_runs.sh | 4 +++- test/testcases/wrap_runs.sh | 4 +++- 26 files changed, 51 insertions(+), 25 deletions(-) diff --git a/test/C/parallel_run.sh b/test/C/parallel_run.sh index ee4f362c7..34bcfc9cc 100755 --- a/test/C/parallel_run.sh +++ b/test/C/parallel_run.sh @@ -39,7 +39,7 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./pres_temp_4D_wr ${TESTOUTDIR}/pres_temp_4D.bb.nc diff --git a/test/C/seq_runs.sh b/test/C/seq_runs.sh index 6d07fc893..1098e2896 100755 --- a/test/C/seq_runs.sh +++ b/test/C/seq_runs.sh @@ -32,7 +32,8 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} ./pres_temp_4D_wr ${TESTOUTDIR}/pres_temp_4D.bb.nc ${TESTSEQRUN} ./pres_temp_4D_rd ${TESTOUTDIR}/pres_temp_4D.bb.nc diff --git a/test/CXX/parallel_run.sh b/test/CXX/parallel_run.sh index 37b012ad1..df7faa15b 100755 --- a/test/CXX/parallel_run.sh +++ b/test/CXX/parallel_run.sh @@ -41,7 +41,7 @@ for i in ${TESTPROGRAMS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc diff --git a/test/CXX/wrap_runs.sh b/test/CXX/wrap_runs.sh index 15473151a..d34ca7b47 100755 --- a/test/CXX/wrap_runs.sh +++ b/test/CXX/wrap_runs.sh @@ -33,7 +33,9 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/F90/parallel_run.sh b/test/F90/parallel_run.sh index da500944d..4fb349f50 100755 --- a/test/F90/parallel_run.sh +++ b/test/F90/parallel_run.sh @@ -41,7 +41,7 @@ for i in ${PARALLEL_PROGS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc diff --git a/test/F90/seq_runs.sh b/test/F90/seq_runs.sh index 03f82de7f..9a816ad2d 100755 --- a/test/F90/seq_runs.sh +++ b/test/F90/seq_runs.sh @@ -20,6 +20,8 @@ OUTDIR=$(echo $TESTOUTDIR | cut -d: -f2) mv ${OUTDIR}/tst_io1.nc ${OUTDIR}/tst_io1.nc0 if test "x${ENABLE_BURST_BUFFER}" = x1 ; then + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} ./tst_io ${TESTOUTDIR} unset PNETCDF_HINTS diff --git a/test/F90/wrap_runs.sh b/test/F90/wrap_runs.sh index 2c6f88622..716aacf06 100755 --- a/test/F90/wrap_runs.sh +++ b/test/F90/wrap_runs.sh @@ -33,7 +33,9 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/cdf_format/parallel_run.sh b/test/cdf_format/parallel_run.sh index dd4f842da..cb46e4f46 100755 --- a/test/cdf_format/parallel_run.sh +++ b/test/cdf_format/parallel_run.sh @@ -44,7 +44,7 @@ for j in ${safe_modes} ; do ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/dim_cdf12.nc if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./test_inq_format ${srcdir} diff --git a/test/cdf_format/seq_runs.sh b/test/cdf_format/seq_runs.sh index 0c076b3e9..732f08d2d 100755 --- a/test/cdf_format/seq_runs.sh +++ b/test/cdf_format/seq_runs.sh @@ -19,7 +19,8 @@ ${TESTSEQRUN} ./tst_corrupt ${srcdir} # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" diff --git a/test/cdf_format/wrap_runs.sh b/test/cdf_format/wrap_runs.sh index 44f2949f5..c749c7dbe 100755 --- a/test/cdf_format/wrap_runs.sh +++ b/test/cdf_format/wrap_runs.sh @@ -32,7 +32,9 @@ for j in ${safe_modes} ; do ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/$outfile.nc if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/header/parallel_run.sh b/test/header/parallel_run.sh index 1ee813a4e..b336d7ff5 100755 --- a/test/header/parallel_run.sh +++ b/test/header/parallel_run.sh @@ -41,7 +41,7 @@ for i in ${check_PROGRAMS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc diff --git a/test/header/seq_runs.sh b/test/header/seq_runs.sh index 15dd4287d..475c8da8a 100755 --- a/test/header/seq_runs.sh +++ b/test/header/seq_runs.sh @@ -31,7 +31,8 @@ for j in ${safe_modes} ; do if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/largefile/parallel_run.sh b/test/largefile/parallel_run.sh index 0a7bf33e4..edb4a4741 100755 --- a/test/largefile/parallel_run.sh +++ b/test/largefile/parallel_run.sh @@ -33,7 +33,7 @@ for i in ${check_PROGRAMS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc unset PNETCDF_HINTS diff --git a/test/largefile/wrap_runs.sh b/test/largefile/wrap_runs.sh index b298798fd..d9295eb10 100755 --- a/test/largefile/wrap_runs.sh +++ b/test/largefile/wrap_runs.sh @@ -26,7 +26,8 @@ rm -f ${OUTDIR}/$outfile.nc # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.nc unset PNETCDF_HINTS diff --git a/test/nc_test/seq_runs.sh b/test/nc_test/seq_runs.sh index 87c7abd21..e8cd748cb 100755 --- a/test/nc_test/seq_runs.sh +++ b/test/nc_test/seq_runs.sh @@ -48,7 +48,8 @@ ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/test.nc # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" rm -f ${OUTDIR}/tooth-fairy.nc ${OUTDIR}/scratch.nc ${OUTDIR}/test.nc diff --git a/test/nc_test/wrap_runs.sh b/test/nc_test/wrap_runs.sh index 7a0dcf514..343accd9f 100755 --- a/test/nc_test/wrap_runs.sh +++ b/test/nc_test/wrap_runs.sh @@ -34,7 +34,9 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/nf90_test/seq_runs.sh b/test/nf90_test/seq_runs.sh index 00d21995c..ade7a6ad3 100755 --- a/test/nf90_test/seq_runs.sh +++ b/test/nf90_test/seq_runs.sh @@ -46,7 +46,8 @@ if test "${ENABLE_NETCDF4}" = 1 ; then fi if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" rm -f ${OUTDIR}/test.nc diff --git a/test/nf_test/seq_runs.sh b/test/nf_test/seq_runs.sh index cb419929f..f76a7a28c 100755 --- a/test/nf_test/seq_runs.sh +++ b/test/nf_test/seq_runs.sh @@ -47,7 +47,8 @@ ${TESTSEQRUN} ${VALIDATOR} -q ${TESTOUTDIR}/test.nc if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" rm -f ${OUTDIR}/test.nc diff --git a/test/nonblocking/parallel_run.sh b/test/nonblocking/parallel_run.sh index 6857bc5ca..d80f9c914 100755 --- a/test/nonblocking/parallel_run.sh +++ b/test/nonblocking/parallel_run.sh @@ -43,7 +43,7 @@ for i in ${check_PROGRAMS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc diff --git a/test/nonblocking/seq_runs.sh b/test/nonblocking/seq_runs.sh index 38b7a2efe..aa47560d3 100755 --- a/test/nonblocking/seq_runs.sh +++ b/test/nonblocking/seq_runs.sh @@ -27,7 +27,8 @@ done # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} ./mcoll_perf ${TESTOUTDIR}/testfile_bb unset PNETCDF_HINTS diff --git a/test/nonblocking/wrap_runs.sh b/test/nonblocking/wrap_runs.sh index 1128276a8..3fdd5e99c 100755 --- a/test/nonblocking/wrap_runs.sh +++ b/test/nonblocking/wrap_runs.sh @@ -33,7 +33,9 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS diff --git a/test/subfile/parallel_run.sh b/test/subfile/parallel_run.sh index f033eec30..3f3b858e3 100755 --- a/test/subfile/parallel_run.sh +++ b/test/subfile/parallel_run.sh @@ -48,7 +48,7 @@ for i in ${check_PROGRAMS} ; do continue if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i -f ${TESTOUTDIR}/$i.bb.nc -s 2 diff --git a/test/subfile/seq_runs.sh b/test/subfile/seq_runs.sh index a4e425d07..46a34d384 100755 --- a/test/subfile/seq_runs.sh +++ b/test/subfile/seq_runs.sh @@ -39,7 +39,9 @@ for i in ${TESTPROGRAMS} ; do continue if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} ./$i -f ${TESTOUTDIR}/$i.bb.nc -s 2 unset PNETCDF_HINTS diff --git a/test/testcases/parallel_run.sh b/test/testcases/parallel_run.sh index ea592079c..8ff1216a1 100755 --- a/test/testcases/parallel_run.sh +++ b/test/testcases/parallel_run.sh @@ -68,7 +68,7 @@ for i in ${check_PROGRAMS} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "test burst buffering feature" + # echo "---- test burst buffering feature" saved_PNETCDF_HINTS=${PNETCDF_HINTS} export PNETCDF_HINTS="${PNETCDF_HINTS};nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${MPIRUN} ./$i ${TESTOUTDIR}/$i.bb.nc diff --git a/test/testcases/seq_runs.sh b/test/testcases/seq_runs.sh index e65878baf..270536cc6 100755 --- a/test/testcases/seq_runs.sh +++ b/test/testcases/seq_runs.sh @@ -43,7 +43,9 @@ rm -f ${OUT_PATH}/testfile.nc # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + # Run using burst buffer driver export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} ./put_all_kinds ${TESTOUTDIR}/put_all_kinds.bb.nc diff --git a/test/testcases/wrap_runs.sh b/test/testcases/wrap_runs.sh index 1d481496f..280216b4d 100755 --- a/test/testcases/wrap_runs.sh +++ b/test/testcases/wrap_runs.sh @@ -33,7 +33,9 @@ for j in ${safe_modes} ; do # echo "" if test "x${ENABLE_BURST_BUFFER}" = x1 ; then - # echo "---- testing burst buffering" + echo "" + echo "---- testing burst buffering" + export PNETCDF_HINTS="nc_burst_buf=enable;nc_burst_buf_dirname=${TESTOUTDIR};nc_burst_buf_overwrite=enable" ${TESTSEQRUN} $1 ${TESTOUTDIR}/$outfile.bb.nc unset PNETCDF_HINTS From 2ba1e0ddee0269d71e8631bae7d29883a3259e3d Mon Sep 17 00:00:00 2001 From: wkliao Date: Wed, 13 Mar 2024 12:44:07 -0500 Subject: [PATCH 6/9] use MPI_Count for large requests --- sneak_peek.md | 13 +- src/drivers/common/create_imaptype.c | 60 +++++-- src/drivers/common/pack_unpack.c | 45 ++++- src/drivers/ncbbio/ncbbio_log_flush.c | 7 - src/drivers/ncbbio/ncbbio_var.c | 31 +++- src/drivers/ncmpio/ncmpio_NC.h | 4 +- src/drivers/ncmpio/ncmpio_attr.m4 | 13 -- src/drivers/ncmpio/ncmpio_enddef.c | 117 ++++++++----- src/drivers/ncmpio/ncmpio_file_io.c | 149 +++++++++------- src/drivers/ncmpio/ncmpio_filetype.c | 100 ++++++++--- src/drivers/ncmpio/ncmpio_fill.c | 116 ++++++++++--- src/drivers/ncmpio/ncmpio_getput.m4 | 61 +++---- src/drivers/ncmpio/ncmpio_i_getput.m4 | 22 +-- src/drivers/ncmpio/ncmpio_i_varn.m4 | 15 +- src/drivers/ncmpio/ncmpio_subfile.c | 22 ++- src/drivers/ncmpio/ncmpio_sync.c | 2 +- src/drivers/ncmpio/ncmpio_util.c | 57 ++++-- src/drivers/ncmpio/ncmpio_vard.c | 21 +-- src/drivers/ncmpio/ncmpio_wait.c | 239 +++++++++++++++++++++----- test/largefile/Makefile.am | 4 +- test/largefile/large_coalesce.c | 40 ----- 21 files changed, 741 insertions(+), 397 deletions(-) diff --git a/sneak_peek.md b/sneak_peek.md index e9e26aa90..6bbfcf88e 100644 --- a/sneak_peek.md +++ b/sneak_peek.md @@ -14,14 +14,21 @@ This is essentially a placeholder for the next release note ... ``` ncmpi_put_vara_float_all(ncid, varid, start, count, buf); ``` + + PnetCDF now allows a single read/write request from a process of size + larger than 2 GiB. Large requests are passed down to the MP-IO library, as + many modern MPI-IO implementations can handle such large requests. This + feature thus deprecates the configure option `--enable-large-single-req`. * New optimization + none * New Limitations - + none + + Hint `nc_header_read_chunk_size` is limited to `NC_MAX_INT`. PnetCDF reads + file header in chunks. This hint customizes the chunk size. * Configure options + + `--enable-large-single-req` has been removed, as PnetCDF now allows a + single reqd/write request of size larger than 2 GiB. + `--disable-file-sync` is now deprecated. This configure option alone does not provide a sufficient data consistency. Users are suggested to call `ncmpi_sync` and `MPI_Barrier` to achieve a desired consistency. @@ -146,6 +153,7 @@ This is essentially a placeholder for the next release note ... See [PR #79](https://github.com/Parallel-NetCDF/PnetCDF/pull/79). * Bug fixes + + Fix hint values that are actually used. See commit 41e8ef8. + Fix residual values of `v_align` and `r_align` when re-entering the define mode multiple times. See [PR #126](https://github.com/Parallel-NetCDF/PnetCDF/pull/126). @@ -166,6 +174,9 @@ This is essentially a placeholder for the next release note ... + none * New test program + + test/largefile/large_header.c - test file header size larger than 2 GiB. + + test/largefile/large_reqs.c - test a single read/write request of size + larger than 2 GiB. + test/testcases/tst_redefine.c - test multiple entries of `ncmpi__enddef` [PR #126](https://github.com/Parallel-NetCDF/PnetCDF/pull/126). + test/testcases/tst_symlink.c - test `NC_CLOBBER` on a symbolic link. diff --git a/src/drivers/common/create_imaptype.c b/src/drivers/common/create_imaptype.c index 80eda1e11..03169d402 100644 --- a/src/drivers/common/create_imaptype.c +++ b/src/drivers/common/create_imaptype.c @@ -27,7 +27,7 @@ int ncmpii_create_imaptype(int ndims, const MPI_Offset *count, /* [ndims] */ const MPI_Offset *imap, /* [ndims] */ - MPI_Datatype ptype, /* element type in buftype */ + MPI_Datatype itype, /* element type */ MPI_Datatype *imaptype)/* out */ { int dim, el_size, mpireturn; @@ -55,7 +55,8 @@ ncmpii_create_imaptype(int ndims, if (dim == -1) /* imap is a contiguous layout */ return NC_NOERR; - MPI_Type_size(ptype, &el_size); + /* itype: element data type (MPI primitive type) */ + MPI_Type_size(itype, &el_size); /* We have a true varm call, as imap gives non-contiguous layout. * User buffer will be packed (write case) or unpacked (read case) @@ -66,17 +67,28 @@ ncmpii_create_imaptype(int ndims, * dim is the first dimension (C order, eg. ZYX) that has * non-contiguous imap. */ - if (imap_contig_blocklen != (int)imap_contig_blocklen) - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - if (count[dim] != (int)count[dim] || imap[dim] != (int)imap[dim]) + if (imap_contig_blocklen > NC_MAX_INT || count[dim] > NC_MAX_INT || + imap[dim] > NC_MAX_INT) { +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_vector_c(count[dim], imap_contig_blocklen, + imap[dim], itype, imaptype); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector_c"); + DEBUG_RETURN_ERROR(NC_EMPI) + } +#else DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - - mpireturn = MPI_Type_vector((int)count[dim], (int)imap_contig_blocklen, - (int)imap[dim], ptype, imaptype); - if (mpireturn != MPI_SUCCESS) { - ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector"); - DEBUG_RETURN_ERROR(NC_EMPI) +#endif } + else { + mpireturn = MPI_Type_vector((int)count[dim], (int)imap_contig_blocklen, + (int)imap[dim], itype, imaptype); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Type_vector"); + DEBUG_RETURN_ERROR(NC_EMPI) + } + } + mpireturn = MPI_Type_commit(imaptype); if (mpireturn != MPI_SUCCESS) { ncmpii_error_mpi2nc(mpireturn,"MPI_Type_commit"); @@ -85,14 +97,26 @@ ncmpii_create_imaptype(int ndims, for (dim--; dim>=0; dim--) { MPI_Datatype tmptype; - if (count[dim] != (int)count[dim]) - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - mpireturn = MPI_Type_create_hvector((int)count[dim], 1, - imap[dim]*el_size, *imaptype, &tmptype); - if (mpireturn != MPI_SUCCESS) { - ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector"); - DEBUG_RETURN_ERROR(NC_EMPI) + if (count[dim] > NC_MAX_INT) { +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hvector_c(count[dim], 1, + imap[dim]*el_size, *imaptype, &tmptype); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector_c"); + DEBUG_RETURN_ERROR(NC_EMPI) + } +#else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) +#endif + } + else { + mpireturn = MPI_Type_create_hvector((int)count[dim], 1, + imap[dim]*el_size, *imaptype, &tmptype); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hvector"); + DEBUG_RETURN_ERROR(NC_EMPI) + } } mpireturn = MPI_Type_free(imaptype); diff --git a/src/drivers/common/pack_unpack.c b/src/drivers/common/pack_unpack.c index 681eff069..b6c502184 100644 --- a/src/drivers/common/pack_unpack.c +++ b/src/drivers/common/pack_unpack.c @@ -38,15 +38,29 @@ ncmpii_pack(int ndims, void **cbuf) /* OUT: a contiguous buffer */ { void *lbuf=NULL; - int i, err=NC_NOERR, position, type_size; + int i, err=NC_NOERR, mpireturn; MPI_Offset buf_size, nelems; MPI_Datatype etype, imaptype=MPI_DATATYPE_NULL; - *cbuf = buf; - - MPI_Type_size(buftype, &type_size); +#if MPI_VERSION >= 3 + MPI_Count position, type_size; + mpireturn = MPI_Type_size_c(buftype, &type_size); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_c"); + DEBUG_RETURN_ERROR(err) + } +#else + int position, type_size; + mpireturn = MPI_Type_size(buftype, &type_size); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size"); + DEBUG_RETURN_ERROR(err) + } +#endif buf_size = type_size; + *cbuf = buf; + for (nelems=1, i=0; i= 3 + MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, lbuf, + (MPI_Count)buf_size, &position, MPI_COMM_SELF); +#else + if (buf_size > NC_MAX_INT) + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + MPI_Pack(buf, (int)bufcount, buftype, lbuf, (int)buf_size, &position, MPI_COMM_SELF); +#endif } } @@ -110,10 +128,19 @@ ncmpii_pack(int ndims, /* Step 2: pack lbuf to cbuf if imap is non-contiguous */ if (imaptype != MPI_DATATYPE_NULL) { /* true varm */ /* pack lbuf to cbuf, a contiguous buffer, using imaptype */ - *cbuf = NCI_Malloc((size_t)buf_size); position = 0; +#if MPI_VERSION >= 3 + *cbuf = NCI_Malloc((size_t)buf_size); + MPI_Pack_c(lbuf, 1, imaptype, *cbuf, (MPI_Count)buf_size, &position, + MPI_COMM_SELF); +#else + if (buf_size > NC_MAX_INT) { + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + + *cbuf = NCI_Malloc((size_t)buf_size); MPI_Pack(lbuf, 1, imaptype, *cbuf, (int)buf_size, &position, MPI_COMM_SELF); +#endif MPI_Type_free(&imaptype); } else /* reuse lbuf */ diff --git a/src/drivers/ncbbio/ncbbio_log_flush.c b/src/drivers/ncbbio/ncbbio_log_flush.c index 84531c40a..a87ee249a 100644 --- a/src/drivers/ncbbio/ncbbio_log_flush.c +++ b/src/drivers/ncbbio/ncbbio_log_flush.c @@ -102,13 +102,6 @@ int ncbbio_log_flush_core(NC_bb *ncbbp) { if (ncbbp->flushbuffersize > 0 && databuffersize > (size_t)(ncbbp->flushbuffersize)){ databuffersize = ncbbp->flushbuffersize; } - /* Without enabling large_req, we can not post requests larger than 2GiB */ -#ifndef ENABLE_LARGE_SINGLE_REQ - if (databuffersize > 2147483647){ - databuffersize = 2147483647; - } -#endif - /* We assume user will not issue single request larger than 2GiB wwithout enabling large_req */ if (databuffersize < (size_t)(ncbbp->maxentrysize)){ databuffersize = ncbbp->maxentrysize; } diff --git a/src/drivers/ncbbio/ncbbio_var.c b/src/drivers/ncbbio/ncbbio_var.c index 45def114b..9bce692a8 100644 --- a/src/drivers/ncbbio/ncbbio_var.c +++ b/src/drivers/ncbbio/ncbbio_var.c @@ -405,7 +405,7 @@ ncbbio_put_varn(void *ncdp, /* When bufcount > 0, this subroutine is called from a flexible API. If * buftype is noncontiguous, we pack buf into cbuf, a contiguous buffer. */ - int isderived, iscontig, elsize, position = 0; + int isderived, iscontig, elsize; MPI_Offset bnelems=0; err = ncmpii_dtype_decode(buftype, &itype, &elsize, &bnelems, @@ -413,12 +413,33 @@ ncbbio_put_varn(void *ncdp, if (err != NC_NOERR) return err; if (!iscontig) { /* pack only if non-contiguous */ + int mpireturn; bnelems *= elsize; - if (bnelems != (int)bnelems) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - cbuf = NCI_Malloc(bnelems); - MPI_Pack((void*)buf, (int)bufcount, buftype, cbuf, (int)bnelems, - &position, MPI_COMM_SELF); + if (bnelems > NC_MAX_INT) { +#if MPI_VERSION >= 3 + MPI_Count position=0; + cbuf = NCI_Malloc(bnelems); + mpireturn = MPI_Pack_c((void*)buf, bufcount, buftype, cbuf, + bnelems, &position, MPI_COMM_SELF); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Pack_c"); + DEBUG_RETURN_ERROR(NC_EMPI) + } +#else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) +#endif + } + else { + int position=0; + cbuf = NCI_Malloc(bnelems); + mpireturn = MPI_Pack((void*)buf, (int)bufcount, buftype, cbuf, + (int)bnelems, &position, MPI_COMM_SELF); + if (mpireturn != MPI_SUCCESS) { + ncmpii_error_mpi2nc(mpireturn,"MPI_Pack"); + DEBUG_RETURN_ERROR(NC_EMPI) + } + } } } diff --git a/src/drivers/ncmpio/ncmpio_NC.h b/src/drivers/ncmpio/ncmpio_NC.h index 2d1a15889..1b7d02007 100644 --- a/src/drivers/ncmpio/ncmpio_NC.h +++ b/src/drivers/ncmpio/ncmpio_NC.h @@ -445,7 +445,7 @@ typedef struct bufferinfo { MPI_File collective_fh; MPI_Offset get_size; /* amount of file read n bytes so far */ MPI_Offset offset; /* current read/write offset in the file */ - int size; /* allocated size of the buffer */ + size_t size; /* allocated size of the buffer */ int version; /* 1, 2, and 5 for CDF-1, 2, and 5 respectively */ int safe_mode;/* 0: disabled, 1: enabled */ int rw_mode; /* 0: independent, 1: collective */ @@ -597,7 +597,7 @@ ncmpio_unpack_xbuf(int format, NC_var *varp, MPI_Offset bufcount, /* Begin defined in ncmpio_file_io.c ----------------------------------------*/ extern int ncmpio_read_write(NC *ncp, int rw_flag, int coll_indep, MPI_Offset offset, - int len, MPI_Datatype buf_type, void *buf, + MPI_Offset buf_count, MPI_Datatype buf_type, void *buf, int buftype_is_contig); #endif /* _NC_H */ diff --git a/src/drivers/ncmpio/ncmpio_attr.m4 b/src/drivers/ncmpio/ncmpio_attr.m4 index ec2b95a03..b76cbdff7 100644 --- a/src/drivers/ncmpio/ncmpio_attr.m4 +++ b/src/drivers/ncmpio/ncmpio_attr.m4 @@ -32,7 +32,6 @@ dnl #include #endif #include -#include /* INT_MAX */ #include #include @@ -556,11 +555,6 @@ ncmpio_copy_att(void *ncdp_in, goto err_check; } - if (iattrp->xsz != (int)iattrp->xsz) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto err_check; - } - indx = ncmpio_NC_findattr(ncap_out, nname); if (indx >= 0) { /* name in use in ncap_out */ @@ -996,13 +990,6 @@ ncmpio_put_att(void *ncdp, xsz = x_len_NC_attrV(xtype, nelems); /* xsz is the total aligned size of this attribute */ -#ifndef ENABLE_LARGE_SINGLE_REQ - if (xsz > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EMAX_REQ) - goto err_check; - } -#endif - /* create a normalized character string */ err = ncmpii_utf8_normalize(name, &nname); if (err != NC_NOERR) goto err_check; diff --git a/src/drivers/ncmpio/ncmpio_enddef.c b/src/drivers/ncmpio/ncmpio_enddef.c index cd62593ce..f16a5b932 100644 --- a/src/drivers/ncmpio/ncmpio_enddef.c +++ b/src/drivers/ncmpio/ncmpio_enddef.c @@ -301,7 +301,8 @@ NC_begins(NC *ncp) */ /* Now calculate the starting file offsets for all variables. - loop thru vars, first pass is for the 'non-record' vars */ + * loop thru vars, first pass is for the 'non-record' vars + */ end_var = ncp->begin_var; for (j=0, i=0; ivars.ndefined; i++) { /* skip record variables on this pass */ @@ -310,7 +311,7 @@ NC_begins(NC *ncp) if (first_var == NULL) first_var = ncp->vars.value[i]; /* for CDF-1 check if over the file size limit 32-bit integer */ - if (ncp->format == 1 && end_var > X_OFF_MAX) + if (ncp->format == 1 && end_var > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EVARSIZE) /* this will pad out non-record variables with the 4-byte alignment */ @@ -376,8 +377,8 @@ NC_begins(NC *ncp) /* skip non-record variables on this pass */ continue; - /* X_OFF_MAX is the max of 32-bit integer */ - if (ncp->format == 1 && end_var > X_OFF_MAX) + /* NC_MAX_INT is the max of 32-bit integer */ + if (ncp->format == 1 && end_var > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EVARSIZE) /* A few attempts at aligning record variables have failed @@ -403,7 +404,7 @@ NC_begins(NC *ncp) /* check if record size must fit in 32-bits (for CDF-1) */ #if SIZEOF_OFF_T == SIZEOF_SIZE_T && SIZEOF_SIZE_T == 4 - if (ncp->recsize > X_UINT_MAX - ncp->vars.value[i]->len) + if (ncp->recsize > NC_MAX_UINT - ncp->vars.value[i]->len) DEBUG_RETURN_ERROR(NC_EVARSIZE) #endif ncp->recsize += ncp->vars.value[i]->len; @@ -454,37 +455,58 @@ NC_begins(NC *ncp) static int write_NC(NC *ncp) { - void *buf=NULL; - int status=NC_NOERR, mpireturn, err, rank, header_wlen; - size_t bufLen; + int status=NC_NOERR, mpireturn, err, rank; + MPI_Offset i, header_wlen, ntimes; MPI_Status mpistatus; assert(!NC_readonly(ncp)); - if (ncp->begin_var > X_INT_MAX) /* a fatal error */ - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - MPI_Comm_rank(ncp->comm, &rank); + /* In NC_begins(), root's ncp->xsz and ncp->begin_var, root's header + * size and extent, have been broadcast (sync-ed) among processes. + */ + +#ifdef ENABLE_NULL_BYTE_HEADER_PADDING + /* NetCDF classic file formats require the file header null-byte padded. + * PnetCDF's default is not to write the padding area (between ncp->xsz and + * ncp->begin_var). When this padding feature is enabled, we write the + * padding area only when writing the header the first time, i.e. creating + * a new file, or the new header extent becomes larger than the old one. + */ + if (ncp->old == NULL || ncp->begin_var > ncp->old->begin_var) + header_wlen = ncp->begin_var; + else + header_wlen = ncp->xsz; +#else + /* Do not write padding area (between ncp->xsz and ncp->begin_var) */ + header_wlen = ncp->xsz; +#endif + + header_wlen = _RNDUP(header_wlen, X_ALIGN); + + /* if header_wlen is > NC_MAX_INT, then write the header in chunks. + * Note reading file header is already done in chunks. See + * ncmpio_hdr_get_NC(). + */ + ntimes = header_wlen / NC_MAX_INT; + if (header_wlen % NC_MAX_INT) ntimes++; + /* only rank 0's header gets written to the file */ if (rank == 0) { + void *buf=NULL; + int bufCount; + MPI_Offset remain; - /* In NC_begins(), root's ncp->xsz and ncp->begin_var, root's header - * size and extent, have been broadcast (sync-ed) among processes. - */ #ifdef ENABLE_NULL_BYTE_HEADER_PADDING /* NetCDF classic file formats require the file header null-byte * padded. Thus we must calloc a buffer of size equal to file header * extent. */ - header_wlen = (int) ncp->begin_var; - bufLen = _RNDUP(header_wlen, X_ALIGN); - buf = NCI_Calloc(bufLen, 1); + buf = NCI_Calloc(header_wlen, 1); #else /* Do not write padding area (between ncp->xsz and ncp->begin_var) */ - header_wlen = (int) ncp->xsz; - bufLen = _RNDUP(header_wlen, X_ALIGN); - buf = NCI_Malloc(bufLen); + buf = NCI_Malloc(header_wlen); #endif /* copy the entire local header object to buf */ @@ -506,39 +528,42 @@ write_NC(NC *ncp) */ memset(&mpistatus, 0, sizeof(MPI_Status)); #endif - /* write the header extent, not just header size, because NetCDF file - * format specification requires null byte padding for header. - */ - if (fIsSet(ncp->flags, NC_HCOLL)) - TRACE_IO(MPI_File_write_at_all)(ncp->collective_fh, 0, buf, - header_wlen, MPI_BYTE, &mpistatus); - else - TRACE_IO(MPI_File_write_at)(ncp->collective_fh, 0, buf, - header_wlen, MPI_BYTE, &mpistatus); - if (mpireturn != MPI_SUCCESS) { - err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at"); - /* write has failed, which is more serious than inconsistency */ - if (err == NC_EFILE) DEBUG_ASSIGN_ERROR(status, NC_EWRITE) - } - else { + /* write the header in chunks */ + remain = header_wlen; + for (i=0; iflags, NC_HCOLL)) + TRACE_IO(MPI_File_write_at_all)(ncp->collective_fh, 0, buf, + bufCount, MPI_BYTE, &mpistatus); + else + TRACE_IO(MPI_File_write_at)(ncp->collective_fh, 0, buf, + bufCount, MPI_BYTE, &mpistatus); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at"); + /* write has failed, which is more serious than inconsistency */ + if (err == NC_EFILE) DEBUG_ASSIGN_ERROR(status, NC_EWRITE) + } + else { #ifdef _USE_MPI_GET_COUNT - int put_size; - MPI_Get_count(&mpistatus, MPI_BYTE, &put_size); - ncp->put_size += put_size; + int put_size; + MPI_Get_count(&mpistatus, MPI_BYTE, &put_size); + ncp->put_size += put_size; #else - ncp->put_size += header_wlen; + ncp->put_size += header_wlen; #endif + } + remain -= NC_MAX_INT; } + NCI_Free(buf); } else if (fIsSet(ncp->flags, NC_HCOLL)) { /* other processes participate the collective call */ - TRACE_IO(MPI_File_write_at_all)(ncp->collective_fh, 0, NULL, - 0, MPI_BYTE, &mpistatus); + for (i=0; icollective_fh, 0, NULL, + 0, MPI_BYTE, &mpistatus); } fn_exit: - if (buf != NULL) NCI_Free(buf); - if (ncp->safe_mode == 1) { /* broadcast root's status, because only root writes to the file */ int root_status = status; @@ -618,11 +643,11 @@ ncmpio_NC_check_vlens(NC *ncp) 2 and 5. */ if (ncp->format >= 5) /* CDF-5 format max */ - vlen_max = X_INT64_MAX - 3; /* "- 3" handles rounded-up size */ + vlen_max = NC_MAX_INT64 - 3; /* "- 3" handles rounded-up size */ else if (ncp->format == 2) /* CDF2 format */ - vlen_max = X_UINT_MAX - 3; /* "- 3" handles rounded-up size */ + vlen_max = NC_MAX_UINT - 3; /* "- 3" handles rounded-up size */ else - vlen_max = X_INT_MAX - 3; /* CDF1 format */ + vlen_max = NC_MAX_INT - 3; /* CDF1 format */ /* Loop through vars, first pass is for non-record variables */ large_fix_vars_count = 0; diff --git a/src/drivers/ncmpio/ncmpio_file_io.c b/src/drivers/ncmpio/ncmpio_file_io.c index e25ce5c5f..d17dfb023 100644 --- a/src/drivers/ncmpio/ncmpio_file_io.c +++ b/src/drivers/ncmpio/ncmpio_file_io.c @@ -10,7 +10,6 @@ #include #include #include /* memset() */ -#include /* INT_MAX */ #include @@ -24,7 +23,7 @@ ncmpio_read_write(NC *ncp, int rw_flag, /* NC_REQ_WR or NC_REQ_RD */ int coll_indep, /* NC_REQ_COLL or NC_REQ_INDEP */ MPI_Offset offset, - int len, + MPI_Offset buf_count, MPI_Datatype buf_type, void *buf, int buftype_is_contig) @@ -32,48 +31,18 @@ ncmpio_read_write(NC *ncp, int status=NC_NOERR, mpireturn, err; MPI_Status mpistatus; MPI_File fh; + MPI_Offset req_size; #if MPI_VERSION >= 3 - MPI_Count req_size; + MPI_Count btype_size; /* MPI_Type_size_x is introduced in MPI 3.0 */ - MPI_Type_size_x(buf_type, &req_size); + MPI_Type_size_x(buf_type, &btype_size); #else - int req_size; - MPI_Type_size(buf_type, &req_size); + int btype_size; + MPI_Type_size(buf_type, &btype_size); #endif - /* request size in bytes */ - req_size *= len; - -#ifndef ENABLE_LARGE_SINGLE_REQ -#if MPI_VERSION >= 3 - if (req_size > INT_MAX) - /* I/O request size > 2 GiB, ROMIO currently does not support a single - * read/write call of amount > 2 GiB - */ -#else - if (req_size < 0) - /* In MPI 2.x and prior, argument "size" in MPI_Type_size is defined - * as of type int. When int overflow occurs, the returned value in - * "size" argument may be a negative. This means the aggregated request - * size > 2 GiB. However, ROMIO currently does not support a single - * request with amount > 2 GiB - */ -#endif - { - if (ncp->safe_mode) { - if (rw_flag == NC_REQ_RD) - printf("Error at %s at %d: size of read request (%lld) > INT_MAX\n", - __FILE__,__LINE__,(long long)req_size); - else - printf("Error at %s at %d: size of write request (%lld) > INT_MAX\n", - __FILE__,__LINE__,(long long)req_size); - } - if (coll_indep == NC_REQ_INDEP) DEBUG_RETURN_ERROR(NC_EMAX_REQ) - DEBUG_ASSIGN_ERROR(status, NC_EMAX_REQ) - buf_type = MPI_BYTE; - len = 0; /* allow this process to participate collective call */ - } -#endif + /* request size in bytes, may be > NC_MAX_INT */ + req_size = (MPI_Offset)btype_size * buf_count; #ifdef _USE_MPI_GET_COUNT /* explicitly initialize mpistatus object to 0. For zero-length read, @@ -91,18 +60,35 @@ ncmpio_read_write(NC *ncp, if (rw_flag == NC_REQ_RD) { void *xbuf=buf; - int xlen=len; + int xlen=(int)buf_count; MPI_Datatype xbuf_type=buf_type; - /* if the read buffer is noncontiguous and size is < ncp->ibuf_size, - * allocate a temporary buffer and use it to read, as some MPI, e.g. - * Cray on KNL, can be significantly slow when read buffer is - * noncontiguous. - */ - if (len > 0 && !buftype_is_contig && req_size <= ncp->ibuf_size) { - xlen = req_size; - xbuf = NCI_Malloc(xlen); - xbuf_type = MPI_BYTE; + if (buf_count > NC_MAX_INT) { +#if MPI_VERSION >= 3 + MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type); + MPI_Type_commit(&xbuf_type); + xlen = 1; +#else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) +#endif + } + else if (buf_count > 0 && !buftype_is_contig && + req_size <= ncp->ibuf_size) { + /* if read buffer is noncontiguous and size is < ncp->ibuf_size, + * allocate a temporary buffer and use it to read, as some MPI, + * e.g. Cray on KNL, can be significantly slow when read buffer is + * noncontiguous. + */ + if (req_size > NC_MAX_INT) { + MPI_Type_contiguous((int)buf_count, buf_type, &xbuf_type); + MPI_Type_commit(&xbuf_type); + xlen = 1; + } + else { + xbuf_type = MPI_BYTE; + xlen = req_size; + } + xbuf = NCI_Malloc((size_t)req_size); } if (coll_indep == NC_REQ_COLL) { @@ -139,26 +125,63 @@ ncmpio_read_write(NC *ncp, #endif } if (xbuf != buf) { /* unpack contiguous xbuf to noncontiguous buf */ +#if MPI_VERSION >= 3 + MPI_Count pos=0; + MPI_Unpack_c(xbuf, xlen, &pos, buf, (MPI_Count)buf_count, buf_type, + MPI_COMM_SELF); +#else int pos=0; - MPI_Unpack(xbuf, xlen, &pos, buf, len, buf_type, MPI_COMM_SELF); + MPI_Unpack(xbuf, xlen, &pos, buf, (int)buf_count, buf_type, + MPI_COMM_SELF); +#endif NCI_Free(xbuf); } + if (xbuf_type != buf_type && xbuf_type != MPI_BYTE) + MPI_Type_free(&xbuf_type); } else { /* NC_REQ_WR */ void *xbuf=buf; - int xlen=len; + int xlen=(int)buf_count; MPI_Datatype xbuf_type=buf_type; - /* if the write buffer is noncontiguous and size is < ncp->ibuf_size, - * allocate a temporary buffer and use it to write, as some MPI, e.g. - * Cray on KNL, can be significantly slow when write buffer is - * noncontiguous. - */ - if (len > 0 && !buftype_is_contig && req_size <= ncp->ibuf_size) { - int pos=0; - xlen = req_size; - xbuf = NCI_Malloc(xlen); - MPI_Pack(buf, len, buf_type, xbuf, xlen, &pos, MPI_COMM_SELF); - xbuf_type = MPI_BYTE; + if (buf_count > NC_MAX_INT) { +#if MPI_VERSION >= 3 + MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type); + MPI_Type_commit(&xbuf_type); + xlen = 1; +#else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) +#endif + } + else if (buf_count > 0 && !buftype_is_contig && + req_size <= ncp->ibuf_size) { + /* if write buffer is noncontiguous and size is < ncp->ibuf_size, + * allocate a temporary buffer and use it to write, as some MPI, + * e.g. Cray on KNL, can be significantly slow when write buffer is + * noncontiguous. + */ + if (req_size > NC_MAX_INT) { +#if MPI_VERSION >= 3 + MPI_Count pos=0; + xbuf = NCI_Malloc(req_size); + MPI_Pack_c(buf, (MPI_Count)buf_count, buf_type, xbuf, + (MPI_Count)req_size, &pos, MPI_COMM_SELF); + MPI_Type_contiguous_c((MPI_Count)req_size, MPI_BYTE, &xbuf_type); + MPI_Type_commit(&xbuf_type); + xlen = 1; +#else + /* skip packing write data into a temp buffer */ + xlen = (int)buf_count; + xbuf_type = buf_type; +#endif + } + else { + int pos=0; + xlen = req_size; + xbuf = NCI_Malloc(xlen); + MPI_Pack(buf, (int)buf_count, buf_type, xbuf, xlen, &pos, + MPI_COMM_SELF); + xbuf_type = MPI_BYTE; + } } if (coll_indep == NC_REQ_COLL) { @@ -195,6 +218,8 @@ ncmpio_read_write(NC *ncp, #endif } if (xbuf != buf) NCI_Free(xbuf); + if (xbuf_type != buf_type && xbuf_type != MPI_BYTE) + MPI_Type_free(&xbuf_type); } return status; diff --git a/src/drivers/ncmpio/ncmpio_filetype.c b/src/drivers/ncmpio/ncmpio_filetype.c index b46d9a6ba..4939bc28d 100644 --- a/src/drivers/ncmpio/ncmpio_filetype.c +++ b/src/drivers/ncmpio/ncmpio_filetype.c @@ -12,7 +12,6 @@ #ifdef HAVE_STDLIB_H #include #endif -#include /* INT_MAX */ #include #include @@ -120,23 +119,45 @@ type_create_subarray64(int ndims, MPI_Datatype oldtype, MPI_Datatype *newtype) { - int i, err=NC_NOERR, mpireturn, tag, blklens[3] = {1, 1, 1}; + int i, err=NC_NOERR, mpireturn; + + if (ndims == 0) DEBUG_RETURN_ERROR(NC_EDIMMETA) + +#if MPI_VERSION >= 3 + MPI_Count *sizes, *subsizes, *starts; + + sizes = (MPI_Count*) NCI_Malloc((size_t)ndims * 3 * sizeof(MPI_Count)); + subsizes = sizes + ndims; + starts = subsizes + ndims; + for (i=0; i INT_MAX || array_of_starts[i] > INT_MAX) { - tag = 1; + if (array_of_sizes[i] > NC_MAX_INT || array_of_starts[i] > NC_MAX_INT) { + big_int = 1; break; } } - if (tag == 0) { + if (big_int == 0) { int *sizes, *subsizes, *starts; /* none of dimensions > 2^31-1, we can safely use * MPI_Type_create_subarray */ @@ -157,6 +178,8 @@ type_create_subarray64(int ndims, return err; } + /* now big_int = 1 */ + /* at least one dimension is of size > 2^31-1 and we cannot use * MPI_Type_create_subarray() to create the newtype, * as its arguments array_of_sizes[] and array_of_starts[] are of @@ -179,9 +202,9 @@ type_create_subarray64(int ndims, if (ndims == 1) { /* blklens argument in MPI_Type_create_hindexed() is of type int */ - blklens[1] = (int)array_of_subsizes[0]; - if (array_of_subsizes[0] != blklens[1]) /* check int overflow */ + if (array_of_subsizes[0] > NC_MAX_INT) /* check int overflow */ DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + blklens[1] = (int)array_of_subsizes[0]; disps[1] = extent * array_of_starts[0]; /* take advantage of disps argument is of type MPI_Aint */ @@ -205,11 +228,13 @@ type_create_subarray64(int ndims, /* count and blocklength arguments in MPI_Type_create_hvector() are of * type int. We need to check for integer overflow */ int count, blocklength; + + if (array_of_subsizes[ndims-2] > NC_MAX_INT || + array_of_subsizes[ndims-1] > NC_MAX_INT) /* check int overflow */ + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + count = (int)array_of_subsizes[ndims-2]; blocklength = (int)array_of_subsizes[ndims-1]; - if (array_of_subsizes[ndims-2] != count || - array_of_subsizes[ndims-1] != blocklength) /* check int overflow */ - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) stride = array_of_sizes[ndims-1] * extent; err = MPI_Type_create_hvector(count, blocklength, stride, oldtype, &type1); @@ -220,9 +245,10 @@ type_create_subarray64(int ndims, /* now iterate through the rest dimensions */ for (i=ndims-3; i>=0; i--) { - count = (int)array_of_subsizes[i]; - if (array_of_subsizes[i] != count) /* check int overflow */ + if (array_of_subsizes[i] > NC_MAX_INT) /* check int overflow */ DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + + count = (int)array_of_subsizes[i]; stride *= array_of_sizes[i+1]; err = MPI_Type_create_hvector(count, 1, stride, type1, &type2); @@ -261,6 +287,7 @@ type_create_subarray64(int ndims, MPI_Type_free(&type1); return NC_NOERR; +#endif } /*----< filetype_create_vara() >--------------------------------------------*/ @@ -302,7 +329,11 @@ filetype_create_vara(const NC *ncp, /* previously, request size has been checked and it must > 0 */ if (IS_RECVAR(varp)) { +#if MPI_VERSION >= 3 + MPI_Count blocklength; +#else int blocklength; +#endif MPI_Datatype rectype=MPI_BYTE; #if SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET @@ -310,9 +341,6 @@ filetype_create_vara(const NC *ncp, status = check_recsize_too_big(ncp->recsize); if (status != NC_NOERR) return status; #endif - /* check overflow, because 1st argument of hvector is of type int */ - if (count[0] != (int) count[0]) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - offset += start[0] * ncp->recsize; if (varp->ndims > 1) { @@ -330,11 +358,22 @@ filetype_create_vara(const NC *ncp, blocklength = varp->xsz; } +#if MPI_VERSION >= 3 + /* concatenate number of count[0] subarray types into filetype */ + err = MPI_Type_create_hvector_c(count[0], blocklength, ncp->recsize, + rectype, &filetype); + if (err != MPI_SUCCESS) + return ncmpii_error_mpi2nc(err, "MPI_Type_create_hvector_c"); +#else + /* check overflow, because 1st argument of hvector is of type int */ + if (count[0] > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + /* concatenate number of count[0] subarray types into filetype */ err = MPI_Type_create_hvector((int)count[0], blocklength, ncp->recsize, rectype, &filetype); if (err != MPI_SUCCESS) return ncmpii_error_mpi2nc(err, "MPI_Type_create_hvector"); +#endif if (rectype != MPI_BYTE) MPI_Type_free(&rectype); } @@ -537,32 +576,41 @@ ncmpio_file_set_view(const NC *ncp, MPI_Comm_rank(ncp->comm, &rank); if (rank == 0) { /* prepend the whole file header to filetype */ + MPI_Datatype root_filetype, ftypes[2]; +#if MPI_VERSION >= 3 + MPI_Count blocklens[2]; + MPI_Count disps[2]; +#else int blocklens[2]; MPI_Aint disps[2]; - MPI_Datatype root_filetype, ftypes[2]; + + /* check if header size > 2^31 */ + if (ncp->begin_var > NC_MAX_INT) + DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) +#endif /* first block is the header extent */ - blocklens[0] = (int)ncp->begin_var; + blocklens[0] = ncp->begin_var; disps[0] = 0; ftypes[0] = MPI_BYTE; - /* check if header size > 2^31 */ - if (ncp->begin_var > INT_MAX) - DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) - /* second block is filetype, the subarray request(s) to the variable */ blocklens[1] = 1; disps[1] = *offset; ftypes[1] = filetype; -#if SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET - if (*offset > INT_MAX) { +#if (MPI_VERSION < 3) && (SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET) + if (*offset > NC_MAX_INT) { blocklens[1] = 0; DEBUG_ASSIGN_ERROR(status, NC_EAINT_TOO_SMALL) } #endif +#if MPI_VERSION >= 3 + MPI_Type_create_struct_c(2, blocklens, disps, ftypes, &root_filetype); +#else MPI_Type_create_struct(2, blocklens, disps, ftypes, &root_filetype); +#endif MPI_Type_commit(&root_filetype); TRACE_IO(MPI_File_set_view)(fh, 0, MPI_BYTE, root_filetype, "native", diff --git a/src/drivers/ncmpio/ncmpio_fill.c b/src/drivers/ncmpio/ncmpio_fill.c index 3f8de77f4..49fbc902b 100644 --- a/src/drivers/ncmpio/ncmpio_fill.c +++ b/src/drivers/ncmpio/ncmpio_fill.c @@ -154,11 +154,12 @@ fill_var_rec(NC *ncp, NC_var *varp, MPI_Offset recno) /* record number */ { - int err, mpireturn, rank, nprocs; + int err, status=NC_NOERR, mpireturn, rank, nprocs; void *buf; MPI_Offset var_len, start, count, offset; MPI_File fh; MPI_Status mpistatus; + MPI_Datatype bufType; MPI_Comm_rank(ncp->comm, &rank); MPI_Comm_size(ncp->comm, &nprocs); @@ -190,7 +191,8 @@ fill_var_rec(NC *ncp, err = fill_var_buf(varp, count, buf); if (err != NC_NOERR) { NCI_Free(buf); - return err; + count = 0; /* still participate collective calls below */ + status = err; } /* calculate the starting file offset for each process */ @@ -206,18 +208,39 @@ fill_var_rec(NC *ncp, MPI_INFO_NULL); count *= varp->xsz; - if (count != (int)count) DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - if (err != NC_NOERR) + + bufType = MPI_BYTE; + if (count > NC_MAX_INT) { +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_contiguous_c((MPI_Count)count, MPI_BYTE, &bufType); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c"); + if (status == NC_NOERR) status = err; + /* still participate collective write with 0-length request */ + count = 0; + } + else { + MPI_Type_commit(&bufType); + count = 1; + } +#else + DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) + if (status == NC_NOERR) status = err; count = 0; /* participate collective write with 0-length request */ +#endif + } /* write to variable collectively */ - TRACE_IO(MPI_File_write_at_all)(fh, offset, buf, (int)count, MPI_BYTE, + TRACE_IO(MPI_File_write_at_all)(fh, offset, buf, (int)count, bufType, &mpistatus); NCI_Free(buf); - if (mpireturn != MPI_SUCCESS) - return ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at_all"); + if (bufType != MPI_BYTE) MPI_Type_free(&bufType); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at_all"); + if (status == NC_NOERR) status = err; + } - if (err != NC_NOERR) return err; + if (status != NC_NOERR) return status; if (IS_RECVAR(varp)) { /* update header's number of records in memory */ /* recno may be differenet among, if safe mode is disabled. In @@ -229,18 +252,20 @@ fill_var_rec(NC *ncp, MPI_Offset max_numrecs, numrecs=recno+1; TRACE_COMM(MPI_Allreduce)(&numrecs, &max_numrecs, 1, MPI_OFFSET, MPI_MAX, ncp->comm); - if (mpireturn != MPI_SUCCESS) - return ncmpii_error_mpi2nc(mpireturn, "MPI_Allreduce"); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Allreduce"); + if (status == NC_NOERR) status = err; + } /* In collective mode, ncp->numrecs is always sync-ed among processes */ if (ncp->numrecs < max_numrecs) { err = ncmpio_write_numrecs(ncp, max_numrecs); - if (err != NC_NOERR) return err; + if (status == NC_NOERR) status = err; ncp->numrecs = max_numrecs; } } - return NC_NOERR; + return status; } #ifdef FILL_ONE_VAR_AT_A_TIME @@ -338,19 +363,24 @@ fill_added_recs(NC *ncp, NC *old_ncp) static int fillerup_aggregate(NC *ncp, NC *old_ncp) { - int i, j, k, rank, nprocs, start_vid, recno; - int nVarsFill, *blocklengths; - int mpireturn, err, status=NC_NOERR; + int i, j, k, rank, nprocs, mpireturn, err, status=NC_NOERR; + int start_vid, recno, nVarsFill; char *buf_ptr, *noFill; void *buf; size_t nsegs; MPI_Offset buf_len, var_len, nrecs, start, *count; - MPI_Aint *offset; - MPI_Datatype filetype; + MPI_Datatype filetype, bufType; MPI_File fh; MPI_Status mpistatus; NC_var *varp; +#if MPI_VERSION >= 3 + MPI_Count *blocklengths, *offset; +#else + int *blocklengths; + MPI_Aint *offset; +#endif + MPI_Comm_rank(ncp->comm, &rank); MPI_Comm_size(ncp->comm, &nprocs); @@ -393,7 +423,11 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) /* find the number of write segments (upper bound) */ nsegs = (size_t)(ncp->vars.ndefined + ncp->vars.num_rec_vars * nrecs); count = (MPI_Offset*) NCI_Malloc(nsegs * SIZEOF_MPI_OFFSET); +#if MPI_VERSION >= 3 + offset = (MPI_Count*) NCI_Malloc(nsegs * sizeof(MPI_Count)); +#else offset = (MPI_Aint*) NCI_Malloc(nsegs * SIZEOF_MPI_AINT); +#endif /* calculate each segment's offset and count */ buf_len = 0; /* total write amount, used to allocate buffer */ @@ -481,7 +515,11 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } /* allocate one contiguous buffer space for all writes */ +#if MPI_VERSION >= 3 + blocklengths = (MPI_Count*) NCI_Malloc((size_t)j * sizeof(MPI_Count)); +#else blocklengths = (int*) NCI_Malloc((size_t)j * SIZEOF_INT); +#endif buf = NCI_Malloc((size_t)buf_len); buf_ptr = (char*)buf; @@ -505,13 +543,17 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } count[k] *= varp->xsz; +#if MPI_VERSION >= 3 + blocklengths[k] = (MPI_Count)count[k]; +#else if (count[k] != (int)count[k]) { DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) if (status == NC_NOERR) status = err; continue; /* skip this request */ } - buf_ptr += count[k]; blocklengths[k] = (int)count[k]; +#endif + buf_ptr += count[k]; k++; } /* k is the number of valid write requests thus far */ @@ -536,13 +578,17 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } count[k] *= varp->xsz; +#if MPI_VERSION >= 3 + blocklengths[k] = (MPI_Count)count[k]; +#else if (count[k] != (int)count[k]) { DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) if (status == NC_NOERR) status = err; continue; /* skip this request */ } - buf_ptr += count[k]; blocklengths[k] = (int)count[k]; +#endif + buf_ptr += count[k]; k++; } } @@ -554,8 +600,13 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } else { /* create fileview: a list of contiguous segment for each variable */ - mpireturn = MPI_Type_create_hindexed(k, blocklengths, offset, MPI_BYTE, - &filetype); +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hindexed_c(k, blocklengths, offset, + MPI_BYTE, &filetype); +#else + mpireturn = MPI_Type_create_hindexed(k, blocklengths, offset, + MPI_BYTE, &filetype); +#endif if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_hindexed"); /* return the first encountered error if there is any */ @@ -575,15 +626,32 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) MPI_INFO_NULL); if (k > 0) MPI_Type_free(&filetype); - if (buf_len != (int)buf_len) { + bufType = MPI_BYTE; + if (buf_len > NC_MAX_INT) { +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_contiguous_c((MPI_Count)buf_len, MPI_BYTE, + &bufType); + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c"); + /* return the first encountered error if there is any */ + if (status == NC_NOERR) status = err; + buf_len = 0; + } + else { + MPI_Type_commit(&bufType); + buf_len = 1; + } +#else if (status == NC_NOERR) status = NC_EINTOVERFLOW; - buf_len = 0; /* skip this write */ + buf_len = 0; /* participate collective write with 0-length request */ +#endif } /* write to variable collectively */ - TRACE_IO(MPI_File_write_at_all)(fh, 0, buf, (int)buf_len, MPI_BYTE, &mpistatus); + TRACE_IO(MPI_File_write_at_all)(fh, 0, buf, (int)buf_len, bufType, &mpistatus); NCI_Free(buf); + if (bufType != MPI_BYTE) MPI_Type_free(&bufType); TRACE_IO(MPI_File_set_view)(fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); diff --git a/src/drivers/ncmpio/ncmpio_getput.m4 b/src/drivers/ncmpio/ncmpio_getput.m4 index c13ea2a93..ea94c2afd 100644 --- a/src/drivers/ncmpio/ncmpio_getput.m4 +++ b/src/drivers/ncmpio/ncmpio_getput.m4 @@ -33,7 +33,6 @@ dnl #include #endif #include /* memcpy() */ -#include /* INT_MAX */ #include #include @@ -117,10 +116,10 @@ put_varm(NC *ncp, int reqMode) /* WR/RD/COLL/INDEP */ { void *xbuf=NULL; - int mpireturn, err=NC_NOERR, status=NC_NOERR, nelems=0, buftype_is_contig; + int mpireturn, err=NC_NOERR, status=NC_NOERR, buftype_is_contig; int el_size, need_convert, need_swap, in_place_swap, need_swap_back_buf=0; int coll_indep, xtype_is_contig=1; - MPI_Offset bnelems=0, nbytes=0, offset=0; + MPI_Offset nelems=0, bnelems=0, nbytes=0, offset=0; MPI_Datatype itype, xtype=MPI_BYTE, imaptype, filetype=MPI_BYTE; MPI_File fh; @@ -149,24 +148,18 @@ put_varm(NC *ncp, /* When bufcount is NC_COUNT_IGNORE, this is called from a high-level API. * In this case, buftype must be an MPI predefined data type. If this is - * called from a Fortran program, buftype has already been converted to its - * corresponding C type, e.g. MPI_INTEGER is converted to MPI_INT. + * called from a Fortran program, buftype has already been converted to + * its corresponding C type, e.g. MPI_INTEGER is converted to MPI_INT. * if (bufcount == NC_COUNT_IGNORE) assert(buftype == itype); */ - /* because bnelems will be used as the argument "count" in MPI-IO - * write calls and the argument "count" is of type int */ - if (bnelems > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto err_check; - } -#ifndef ENABLE_LARGE_SINGLE_REQ - /* Not all MPI-IO libraries support single requests larger than 2 GiB */ - if (nbytes > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EMAX_REQ) - goto err_check; - } -#endif + /* When bnelems > NC_MAX_INT, we construct a datatype to bypass the + * limitation of MPI file read/write APIs on the argument "count" of type + * int. See ncmpio_read_write() in ncmpio_file_io.c + * + * Note not all MPI-IO libraries support single requests larger than + * NC_MAX_INT. In this case, MPI-IO should report an error. + */ if (nbytes == 0) /* this process has nothing to write */ goto err_check; @@ -226,11 +219,11 @@ put_varm(NC *ncp, if (buf != xbuf) { /* xbuf is a contiguous buffer */ xtype = ncmpii_nc2mpitype(varp->xtype); - nelems = (int)bnelems; + nelems = bnelems; } else { /* we can safely use bufcount and buftype in MPI File read/write */ - nelems = (bufcount == NC_COUNT_IGNORE) ? bnelems : (int)bufcount; + nelems = (bufcount == NC_COUNT_IGNORE) ? bnelems : bufcount; xtype = buftype; } @@ -370,8 +363,8 @@ get_varm(NC *ncp, { void *xbuf=NULL; int err=NC_NOERR, status=NC_NOERR, coll_indep, xtype_is_contig=1; - int nelems=0, el_size, buftype_is_contig, need_swap=0, need_convert=0; - MPI_Offset bnelems=0, nbytes=0, offset=0; + int el_size, buftype_is_contig, need_swap=0, need_convert=0; + MPI_Offset nelems=0, bnelems=0, nbytes=0, offset=0; MPI_Datatype itype, xtype=MPI_BYTE, filetype=MPI_BYTE, imaptype=MPI_DATATYPE_NULL; MPI_File fh; @@ -405,19 +398,13 @@ get_varm(NC *ncp, * if (bufcount == NC_COUNT_IGNORE) assert(buftype == itype); */ - /* because bnelems will be used as the argument "count" in MPI-IO - * write calls and the argument "count" is of type int */ - if (bnelems > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto err_check; - } -#ifndef ENABLE_LARGE_SINGLE_REQ - /* Not all MPI-IO libraries support single requests larger than 2 GiB */ - if (nbytes > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EMAX_REQ) - goto err_check; - } -#endif + /* When bnelems > NC_MAX_INT, we construct a datatype to bypass the + * limitation of MPI file read/write APIs on the argument "count" of type + * int. See ncmpio_read_write() in ncmpio_file_io.c + * + * Note not all MPI-IO libraries support single requests larger than + * NC_MAX_INT. In this case, MPI-IO should report an error. + */ if (nbytes == 0) /* this process has nothing to read */ goto err_check; @@ -459,12 +446,12 @@ get_varm(NC *ncp, /* Set nelems and xtype which will be used in MPI read/write */ if (buf != xbuf) { /* xbuf is a contiguous buffer */ - nelems = (int)bnelems; + nelems = bnelems; xtype = ncmpii_nc2mpitype(varp->xtype); } else { /* we can safely use bufcount and buftype in MPI File read/write */ - nelems = (bufcount == NC_COUNT_IGNORE) ? bnelems : (int)bufcount; + nelems = (bufcount == NC_COUNT_IGNORE) ? bnelems : bufcount; xtype = buftype; } diff --git a/src/drivers/ncmpio/ncmpio_i_getput.m4 b/src/drivers/ncmpio/ncmpio_i_getput.m4 index 48e8485bf..f69296c16 100644 --- a/src/drivers/ncmpio/ncmpio_i_getput.m4 +++ b/src/drivers/ncmpio/ncmpio_i_getput.m4 @@ -28,7 +28,6 @@ dnl #ifdef HAVE_STDLIB_H #include #endif -#include /* INT_MAX */ #include #include /* memcpy() */ @@ -187,10 +186,13 @@ ncmpio_igetput_varm(NC *ncp, */ MPI_Offset bnelems=0; - if (bufcount > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto fn_exit; - } + /* When bufcount > NC_MAX_INT, we construct a datatype to bypass the + * limitation of MPI file read/write APIs on the argument "count" of + * type int. See ncmpio_read_write() in ncmpio_file_io.c + * + * Note not all MPI-IO libraries support single requests larger than + * NC_MAX_INT. In this case, MPI-IO should report an error. + */ /* itype (primitive MPI data type) from buftype * isize is the size of itype in bytes @@ -210,12 +212,10 @@ ncmpio_igetput_varm(NC *ncp, /* nbytes is the amount of this vara request in bytes */ nbytes = nelems * xsize; -#ifndef ENABLE_LARGE_SINGLE_REQ - if (nbytes > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EMAX_REQ) - goto fn_exit; - } -#endif + /* Skip checking nbytes against NC_MAX_INT. Note not all MPI-IO libraries + * support single requests larger than NC_MAX_INT. In this case, MPI-IO + * should report an error. + */ /* for nonblocking API, return now if request size is zero */ if (nbytes == 0) { diff --git a/src/drivers/ncmpio/ncmpio_i_varn.m4 b/src/drivers/ncmpio/ncmpio_i_varn.m4 index 9dddf1ae7..4a6b21e39 100644 --- a/src/drivers/ncmpio/ncmpio_i_varn.m4 +++ b/src/drivers/ncmpio/ncmpio_i_varn.m4 @@ -28,7 +28,6 @@ dnl #include #endif #include /* memcpy() */ -#include /* INT_MAX */ #include #include @@ -132,11 +131,6 @@ igetput_varn(NC *ncp, */ MPI_Offset bnelems=0; - if (bufcount > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto fn_exit; - } - /* itype (primitive MPI data type) from buftype * isize is the size of itype in bytes * bnelems is the number of itype elements in one buftype @@ -159,13 +153,6 @@ igetput_varn(NC *ncp, /* for nonblocking API, return now if request size is zero */ if (nbytes == 0) goto fn_exit; -#ifndef ENABLE_LARGE_SINGLE_REQ - if (nbytes > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EMAX_REQ) - goto fn_exit; - } -#endif - memChunk = varp->ndims * SIZEOF_MPI_OFFSET; /* check if type conversion and Endianness byte swap is needed */ @@ -431,7 +418,7 @@ igetput_varn(NC *ncp, * later used in the wait call to unpack xbuf using buftype to buf. */ MPI_Type_dup(buftype, &lead_req->buftype); - lead_req->bufcount = (int)bufcount; + lead_req->bufcount = bufcount; } } diff --git a/src/drivers/ncmpio/ncmpio_subfile.c b/src/drivers/ncmpio/ncmpio_subfile.c index 92dfc25a6..0586c9a42 100644 --- a/src/drivers/ncmpio/ncmpio_subfile.c +++ b/src/drivers/ncmpio/ncmpio_subfile.c @@ -782,14 +782,24 @@ ncmpio_subfile_getput_vars(NC *ncp, /* NOTE: no conversion and byte swap are performed here as they are done underneath layer */ if (!buftype_is_contig && bufcount > 0 && bnelems > 0) { - int position=0; MPI_Offset outsize = bnelems * bufcount * el_size; - if (outsize != (int)outsize) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) - if (bufcount != (int)bufcount) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) cbuf = NCI_Malloc((size_t)outsize); - if (fIsSet(reqMode, NC_REQ_WR)) - MPI_Pack(buf, (int)bufcount, buftype, cbuf, (int)outsize, - &position, MPI_COMM_SELF); + if (fIsSet(reqMode, NC_REQ_WR)) { +#if MPI_VERSION >= 3 + MPI_Count position=0; + MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, cbuf, + (MPI_Count)outsize, &position, MPI_COMM_SELF); +#else + int position=0; + if (bufcount > NC_MAX_INT || outsize > NC_MAX_INT) { + NCI_Free(cbuf); + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + } + else + MPI_Pack(buf, (int)bufcount, buftype, cbuf, (int)outsize, + &position, MPI_COMM_SELF); +#endif + } } else cbuf = (void *)buf; diff --git a/src/drivers/ncmpio/ncmpio_sync.c b/src/drivers/ncmpio/ncmpio_sync.c index d6c34f3d5..59f0f92cf 100644 --- a/src/drivers/ncmpio/ncmpio_sync.c +++ b/src/drivers/ncmpio/ncmpio_sync.c @@ -94,7 +94,7 @@ ncmpio_write_numrecs(NC *ncp, if (new_numrecs > ncp->numrecs) ncp->numrecs = new_numrecs; if (ncp->format < 5) { - if (ncp->numrecs != (int)ncp->numrecs) + if (ncp->numrecs > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) len = X_SIZEOF_SIZE_T; err = ncmpix_put_uint32((void**)&buf, (uint)ncp->numrecs); diff --git a/src/drivers/ncmpio/ncmpio_util.c b/src/drivers/ncmpio/ncmpio_util.c index 33ea50235..6579c9425 100644 --- a/src/drivers/ncmpio/ncmpio_util.c +++ b/src/drivers/ncmpio/ncmpio_util.c @@ -9,10 +9,9 @@ #endif #include -#include /* strtoll() is first introducted in C99 */ +#include /* strtoll() is first introduced in C99 */ #include /* strcpy() */ #include /* strcasecmp() */ -#include /* INT_MAX */ #include #include #include @@ -102,8 +101,8 @@ void ncmpio_set_pnetcdf_hints(NC *ncp, if (errno != 0) ncp->chunk = 0; else if (ncp->chunk < 0) ncp->chunk = 0; - else if (chunk > INT_MAX) /* limit to INT_MAX */ - ncp->chunk = INT_MAX; + else if (chunk > NC_MAX_INT) /* limit to NC_MAX_INT */ + ncp->chunk = NC_MAX_INT; sprintf(value, "%d", ncp->chunk); } } @@ -379,13 +378,12 @@ ncmpio_pack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ void *buf, /* user buffer */ void *xbuf) /* already allocated, in external type */ { - int err=NC_NOERR, position, free_lbuf=0, free_cbuf=0; + int err=NC_NOERR, free_lbuf=0, free_cbuf=0; void *lbuf=NULL, *cbuf=NULL; MPI_Offset ibuf_size; /* check byte size of buf (internal representation) */ ibuf_size = nelems * el_size; - if (ibuf_size > INT_MAX) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) /* Step 1: if buftype is not contiguous, i.e. a noncontiguous MPI * derived datatype, pack buf into a contiguous buffer, lbuf, @@ -404,13 +402,19 @@ ncmpio_pack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ if (buf != lbuf) { /* pack buf into lbuf based on buftype */ - if (bufcount > INT_MAX) { +#if MPI_VERSION >= 3 + MPI_Count position = 0; + MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, lbuf, + (MPI_Count)ibuf_size, &position, MPI_COMM_SELF); +#else + int position = 0; + if (bufcount > NC_MAX_INT || ibuf_size > NC_MAX_INT) { if (free_lbuf) NCI_Free(lbuf); DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) } - position = 0; MPI_Pack(buf, (int)bufcount, buftype, lbuf, (int)ibuf_size, &position, MPI_COMM_SELF); +#endif } } else /* for contiguous case, we reuse buf */ @@ -433,9 +437,17 @@ ncmpio_pack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ } /* pack lbuf to cbuf based on imaptype */ - position = 0; +#if MPI_VERSION >= 3 + MPI_Count position = 0; + MPI_Pack_c(lbuf, 1, imaptype, cbuf, (MPI_Count)ibuf_size, &position, + MPI_COMM_SELF); +#else + int position = 0; + if (ibuf_size > NC_MAX_INT) + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) MPI_Pack(lbuf, 1, imaptype, cbuf, (int)ibuf_size, &position, MPI_COMM_SELF); +#endif MPI_Type_free(&imaptype); /* lbuf is no longer needed */ @@ -561,14 +573,13 @@ ncmpio_unpack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ void *buf, /* user buffer */ void *xbuf) /* already allocated, in external type */ { - int err=NC_NOERR, el_size, position, free_lbuf=0, free_cbuf=0; + int err=NC_NOERR, el_size, free_lbuf=0, free_cbuf=0; void *lbuf=NULL, *cbuf=NULL; MPI_Offset ibuf_size; /* check byte size of buf (internal representation) */ - MPI_Type_size(itype, &el_size); + MPI_Type_size(itype, &el_size); /* itype is MPI primitive datatype */ ibuf_size = nelems * el_size; - if (ibuf_size > INT_MAX) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) /* Step 1: type-convert and byte-swap xbuf to cbuf, and xbuf contains data * read from file @@ -659,24 +670,40 @@ ncmpio_unpack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ /* unpacked cbuf into lbuf based on imap -------------------------------*/ if (imaptype != MPI_DATATYPE_NULL) { /* unpack cbuf to lbuf based on imaptype */ - position = 0; +#if MPI_VERSION >= 3 + MPI_Count position = 0; + MPI_Unpack_c(cbuf, (MPI_Count)ibuf_size, &position, lbuf, 1, imaptype, + MPI_COMM_SELF); +#else + int position = 0; + if (ibuf_size > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + MPI_Unpack(cbuf, (int)ibuf_size, &position, lbuf, 1, imaptype, MPI_COMM_SELF); +#endif MPI_Type_free(&imaptype); } /* unpacked lbuf into buf based on buftype -----------------------------*/ if (!buftype_is_contig && lbuf != buf) { /* no need unpack when buftype is used in MPI_File_read (lbuf == buf) */ - if (bufcount > INT_MAX) { +#if MPI_VERSION >= 3 + MPI_Count position = 0; + MPI_Unpack_c(lbuf, (MPI_Count)ibuf_size, &position, buf, + (MPI_Count)bufcount, buftype, MPI_COMM_SELF); +#else + if (bufcount > NC_MAX_INT) { if (err == NC_NOERR) DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) } else { - position = 0; + int position = 0; + if (ibuf_size > NC_MAX_INT) + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) MPI_Unpack(lbuf, (int)ibuf_size, &position, buf, (int)bufcount, buftype, MPI_COMM_SELF); } +#endif } if (free_cbuf) NCI_Free(cbuf); if (free_lbuf) NCI_Free(lbuf); diff --git a/src/drivers/ncmpio/ncmpio_vard.c b/src/drivers/ncmpio/ncmpio_vard.c index 9c3702d51..be958ee59 100644 --- a/src/drivers/ncmpio/ncmpio_vard.c +++ b/src/drivers/ncmpio/ncmpio_vard.c @@ -22,7 +22,6 @@ #include #endif #include /* memcpy() */ -#include /* INT_MAX */ #include #include @@ -56,9 +55,9 @@ getput_vard(NC *ncp, void *xbuf=NULL; int mpireturn, status=NC_NOERR, err=NC_NOERR, xtype_is_contig=1; int el_size, buftype_is_contig=0, need_swap_back_buf=0; - int nelems=0, need_convert=0, need_swap=0, coll_indep, rw_flag; + int need_convert=0, need_swap=0, coll_indep, rw_flag; MPI_File fh; - MPI_Offset fnelems=0, bnelems=0, offset=0; + MPI_Offset nelems=0, fnelems=0, bnelems=0, offset=0; MPI_Datatype etype=MPI_DATATYPE_NULL, xtype=MPI_BYTE; #if MPI_VERSION >= 3 MPI_Count filetype_size=0; @@ -125,14 +124,6 @@ getput_vard(NC *ncp, goto err_check; } -#ifndef ENABLE_LARGE_SINGLE_REQ - /* Not all MPI-IO libraries support single requests larger than 2 GiB */ - if (filetype_size > INT_MAX) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto err_check; - } -#endif - /* get the corresponding MPI datatype of variable external type */ xtype = ncmpii_nc2mpitype(varp->xtype); @@ -174,12 +165,6 @@ getput_vard(NC *ncp, if (err != NC_NOERR) goto err_check; bnelems *= bufcount; -#ifndef ENABLE_LARGE_SINGLE_REQ - if (bnelems != (int)bnelems) { - DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) - goto err_check; - } -#endif /* filetype's number of elements must be equal to request's */ if (fnelems != bnelems) { @@ -252,7 +237,7 @@ getput_vard(NC *ncp, /* Set nelems and xtype which will be used in MPI read/write */ if (buf != xbuf) { /* xbuf is a malloc-ed contiguous buffer */ - nelems = (int)bnelems; + nelems = bnelems; } else { /* we can safely use bufcount and buftype in MPI File read/write. diff --git a/src/drivers/ncmpio/ncmpio_wait.c b/src/drivers/ncmpio/ncmpio_wait.c index d1730e572..d90c8d759 100644 --- a/src/drivers/ncmpio/ncmpio_wait.c +++ b/src/drivers/ncmpio/ncmpio_wait.c @@ -19,7 +19,6 @@ #include #include #include /* memset() */ -#include /* INT_MAX */ #include #include @@ -328,8 +327,13 @@ static int construct_filetypes(NC *ncp, NC_lead_req *lead_list, /* NC_REQ_WR or NC_REQ_RD */ int num_reqs, +#if MPI_VERSION >= 3 + MPI_Count *blocklens, /* [num_reqs] temp buffer */ + MPI_Count *disps, /* [num_reqs] temp buffer */ +#else int *blocklens, /* [num_reqs] temp buffer */ MPI_Aint *disps, /* [num_reqs] temp buffer */ +#endif NC_req *reqs, /* [num_reqs] */ MPI_Datatype *filetype) /* OUT */ { @@ -358,7 +362,7 @@ construct_filetypes(NC *ncp, if (ndims == 0) { /* scalar variable */ #if SIZEOF_MPI_AINT < SIZEOF_MPI_OFFSET - if (lead->varp->begin > INT_MAX) { + if (lead->varp->begin > NC_MAX_INT) { DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) fSet(lead->flag, NC_REQ_SKIP); /* skip this request */ if ( lead->status != NULL && @@ -386,7 +390,7 @@ construct_filetypes(NC *ncp, &is_ftype_contig); #if SIZEOF_MPI_AINT < SIZEOF_MPI_OFFSET - if (err == NC_NOERR && offset > INT_MAX) + if (err == NC_NOERR && offset > NC_MAX_INT) DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) #endif disps[j] = (MPI_Aint)offset; @@ -409,14 +413,24 @@ construct_filetypes(NC *ncp, coalesced_len = blocklens[j]; if (last_contig_req >= 0) coalesced_len += blocklens[last_contig_req]; +#if MPI_VERSION >= 3 + if (last_contig_req >= 0 && + disps[j] - disps[last_contig_req] == + blocklens[last_contig_req]) { + blocklens[last_contig_req] = coalesced_len; + j--; + } + else last_contig_req = j; +#else /* if coalesced_len overflows 4-byte int, then skip coalescing */ - if (coalesced_len < INT_MAX && last_contig_req >= 0 && + if (coalesced_len < NC_MAX_INT && last_contig_req >= 0 && disps[j] - disps[last_contig_req] == blocklens[last_contig_req]) { blocklens[last_contig_req] = coalesced_len; j--; } else last_contig_req = j; +#endif } else { /* we will construct a filetype, set blocklen to 1 */ @@ -441,15 +455,31 @@ construct_filetypes(NC *ncp, } else { /* if (num_reqs > 1 || (num_reqs == 1 && disps[0] > 0)) */ /* all ftypes[] created fine, now concatenate all ftypes[] */ + int mpireturn; + if (all_ftype_contig) { - err = MPI_Type_create_hindexed(num_reqs, blocklens, disps, - MPI_BYTE, filetype); - MPI_Type_commit(filetype); +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hindexed_c(num_reqs, blocklens, disps, + MPI_BYTE, filetype); +#else + mpireturn = MPI_Type_create_hindexed(num_reqs, blocklens, disps, + MPI_BYTE, filetype); +#endif + if (mpireturn != MPI_SUCCESS) + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_hindexed"); + else { + MPI_Type_commit(filetype); + err = NC_NOERR; + } } else { - int mpireturn; +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_struct_c(num_reqs, blocklens, disps, + ftypes, filetype); +#else mpireturn = MPI_Type_create_struct(num_reqs, blocklens, disps, ftypes, filetype); +#endif if (mpireturn != MPI_SUCCESS) err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_struct"); else { @@ -476,8 +506,13 @@ construct_filetypes(NC *ncp, static int construct_buffertypes(NC_lead_req *lead_list, int num_reqs, +#if MPI_VERSION >= 3 + MPI_Count *blocklens, /* [num_reqs] temp buffer */ + MPI_Count *disps, /* [num_reqs] temp buffer */ +#else int *blocklens, /* [num_reqs] temp buffer */ MPI_Aint *disps, /* [num_reqs] temp buffer */ +#endif NC_req *reqs, /* [num_reqs] */ MPI_Datatype *buf_type) /* OUT */ { @@ -505,13 +540,17 @@ construct_buffertypes(NC_lead_req *lead_list, for (k=1; kvarp->ndims; k++) req_size *= count[k]; } +#if MPI_VERSION >= 3 + blocklens[j] = req_size; +#else /* check int overflow */ - if (req_size > INT_MAX) { /* skip this request */ + if (req_size > NC_MAX_INT) { /* skip this request */ fSet(lead->flag, NC_REQ_SKIP); DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) continue; } blocklens[j] = (int)req_size; +#endif MPI_Get_address(reqs[i].xbuf, &ai); if (j == 0) a0 = ai; @@ -523,8 +562,13 @@ construct_buffertypes(NC_lead_req *lead_list, if (num_reqs > 0) { /* concatenate buffer addresses into a single buffer type */ +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hindexed_c(num_reqs, blocklens, disps, + MPI_BYTE, buf_type); +#else mpireturn = MPI_Type_create_hindexed(num_reqs, blocklens, disps, MPI_BYTE, buf_type); +#endif if (mpireturn != MPI_SUCCESS) { int err = ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hindexed"); /* return the first encountered error if there is any */ @@ -1441,9 +1485,15 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ MPI_Datatype *filetype, /* OUT */ MPI_Datatype *buf_type) /* OUT */ { - int i, j, *blocklens, mpireturn; + int i, j, mpireturn; MPI_Offset next_off, next_len, true_nsegs; +#if MPI_VERSION >= 3 + MPI_Count *blocklens; + MPI_Count *disps; +#else + int *blocklens; MPI_Aint *disps; +#endif assert(nsegs > 0); @@ -1469,11 +1519,33 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ } /* j+1 is the coalesced length */ true_nsegs = j + 1; +#if MPI_VERSION >= 3 + blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * true_nsegs); + disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * true_nsegs); + + /* coalesce segs[].off and len to disps[] and blocklens[] */ + disps[0] = segs[0].off; + blocklens[0] = segs[0].len; + for (j=0,i=1; i INT_MAX) { + if (segs[0].len > NC_MAX_INT) { NCI_Free(disps); NCI_Free(blocklens); DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) @@ -1481,7 +1553,7 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ disps[0] = segs[0].off; blocklens[0] = (int)segs[0].len; for (j=0,i=1; i INT_MAX) { + if (segs[i].len > NC_MAX_INT) { NCI_Free(disps); NCI_Free(blocklens); DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) @@ -1496,10 +1568,11 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ blocklens[j] = (int)segs[i].len; } } - /* j+1 is the coalesced length */ + /* Now j+1 is the coalesced length */ mpireturn = MPI_Type_create_hindexed(j+1, blocklens, disps, MPI_BYTE, filetype); +#endif if (mpireturn != MPI_SUCCESS) { *filetype = MPI_BYTE; *buf_type = MPI_BYTE; @@ -1527,13 +1600,34 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ } } /* j+1 is the coalesced length */ + +#if MPI_VERSION >= 3 + if (true_nsegs < j + 1) { + blocklens = (MPI_Count*) NCI_Realloc(blocklens, (j+1) * sizeof(MPI_Count)); + disps = (MPI_Count*) NCI_Realloc(disps, (j+1) * sizeof(MPI_Count)); + } + + /* coalesce segs[].off and len to disps[] and blocklens[] */ + disps[0] = segs[0].buf_addr; + blocklens[0] = segs[0].len; + for (j=0,i=1; i INT_MAX) { + if (segs[0].len > NC_MAX_INT) { NCI_Free(disps); NCI_Free(blocklens); DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) @@ -1541,7 +1635,7 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ disps[0] = segs[0].buf_addr; blocklens[0] = (int)segs[0].len; for (j=0,i=1; i INT_MAX) { + if (segs[i].len > NC_MAX_INT) { NCI_Free(disps); NCI_Free(blocklens); DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) @@ -1555,9 +1649,16 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ blocklens[j] = (int)segs[i].len; } } +#endif /* j+1 is the coalesced length */ + +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hindexed_c(j+1, blocklens, disps, MPI_BYTE, + buf_type); +#else mpireturn = MPI_Type_create_hindexed(j+1, blocklens, disps, MPI_BYTE, buf_type); +#endif NCI_Free(disps); NCI_Free(blocklens); if (mpireturn != MPI_SUCCESS) { @@ -1594,11 +1695,17 @@ req_aggregation(NC *ncp, int interleaved) /* interleaved in reqs[] */ { int i, gtype, err, status=NC_NOERR, ngroups, mpireturn, buf_len; - int *group_index, *group_type, *f_blocklens, *b_blocklens; - int numLeadReqs; + int *group_index, *group_type, numLeadReqs; +#if MPI_VERSION >= 3 + MPI_Count *blocklens, *f_blocklens, *b_blocklens; + MPI_Count *disps, *f_disps, *b_disps; +#else + int *blocklens, *f_blocklens, *b_blocklens; + MPI_Aint *disps, *f_disps, *b_disps; +#endif NC_lead_req *lead_list; void *buf; /* point to starting buffer, used by MPI-IO call */ - MPI_Aint b_begin, b_addr, *f_disps, *b_disps; + MPI_Aint b_begin, b_addr; MPI_Datatype filetype, buf_type, *ftypes, *btypes; MPI_File fh; MPI_Offset max_end, offset; @@ -1718,19 +1825,28 @@ req_aggregation(NC *ncp, ftypes = (MPI_Datatype*) NCI_Malloc((size_t)ngroups*2*sizeof(MPI_Datatype)); btypes = ftypes + ngroups; - f_blocklens = (int*) NCI_Malloc((size_t)ngroups*2*SIZEOF_INT); - b_blocklens = f_blocklens + ngroups; - f_disps = (MPI_Aint*) NCI_Malloc((size_t)ngroups*2*SIZEOF_MPI_AINT); - b_disps = f_disps + ngroups; + + /* temp buffers, used by multiple calls to construct_filetypes() */ +#if MPI_VERSION >= 3 + blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); + disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); + f_blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * ngroups); + f_disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * ngroups); + b_blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * ngroups); + b_disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * ngroups); +#else + blocklens = (int*) NCI_Malloc(sizeof(int) * num_reqs); + disps = (MPI_Aint*) NCI_Malloc(sizeof(MPI_Aint) * num_reqs); + f_blocklens = (int*) NCI_Malloc(sizeof(int) * ngroups); + f_disps = (MPI_Aint*) NCI_Malloc(sizeof(MPI_Aint) * ngroups); + b_blocklens = (int*) NCI_Malloc(sizeof(int) * ngroups); + b_disps = (MPI_Aint*) NCI_Malloc(sizeof(MPI_Aint) * ngroups); +#endif buf = reqs[0].xbuf; /* the buffer of 1st request */ b_disps[0] = 0; /* relative to address of 1st buf */ MPI_Get_address(buf, &b_begin); - /* temp buffers, used by multiple calls to construct_filetypes() */ - int *blocklens = (int*) NCI_Malloc((size_t)num_reqs*SIZEOF_INT); - MPI_Aint *disps = (MPI_Aint*) NCI_Malloc((size_t)num_reqs*SIZEOF_MPI_AINT); - lead_list = (rw_flag == NC_REQ_RD) ? ncp->get_lead_list : ncp->put_lead_list; /* for each group, build a filetype and a buffer type in ftypes[i] and @@ -1837,8 +1953,13 @@ req_aggregation(NC *ncp, } else { /* concatenate all ftypes[] to filetype */ +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_struct_c(ngroups, f_blocklens, f_disps, + ftypes, &filetype); +#else mpireturn = MPI_Type_create_struct(ngroups, f_blocklens, f_disps, ftypes, &filetype); +#endif if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_struct"); /* return the first encountered error if there is any */ @@ -1855,8 +1976,13 @@ req_aggregation(NC *ncp, } /* concatenate all btypes[] to buf_type */ +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_struct_c(ngroups, b_blocklens, b_disps, + btypes, &buf_type); +#else mpireturn = MPI_Type_create_struct(ngroups, b_blocklens, b_disps, btypes, &buf_type); +#endif if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_struct"); /* return the first encountered error if there is any */ @@ -1875,6 +2001,8 @@ req_aggregation(NC *ncp, NCI_Free(ftypes); NCI_Free(f_blocklens); NCI_Free(f_disps); + NCI_Free(b_blocklens); + NCI_Free(b_disps); /* non-lead request list is no longer used once fileview and buftype have * been constructed. Free the start arrays allocated at lead requests. @@ -2125,16 +2253,24 @@ mgetput(NC *ncp, int rw_flag, /* NC_REQ_WR or NC_REQ_RD */ int coll_indep) /* NC_REQ_COLL or NC_REQ_INDEP */ { - int i, j, len=0, numLeadReqs, status=NC_NOERR, mpireturn, err, *blocklens; + int i, j, numLeadReqs, status=NC_NOERR, mpireturn, err; void *buf=NULL; NC_lead_req *lead_list; MPI_Datatype filetype, buf_type=MPI_BYTE; - MPI_Offset offset=0; + MPI_Offset offset=0, buf_count=0; MPI_File fh; - MPI_Aint *disps; +#if MPI_VERSION >= 3 + MPI_Count *blocklens; + MPI_Count *disps; + blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); + disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); +#else + int *blocklens; + MPI_Aint *disps; blocklens = (int*) NCI_Malloc((size_t)num_reqs * SIZEOF_INT); disps = (MPI_Aint*) NCI_Malloc((size_t)num_reqs * SIZEOF_MPI_AINT); +#endif lead_list = (rw_flag == NC_REQ_RD) ? ncp->get_lead_list : ncp->put_lead_list; @@ -2154,7 +2290,7 @@ mgetput(NC *ncp, collective calls: setview/read/write */ filetype = MPI_BYTE; buf = NULL; - len = 0; + buf_count = 0; NCI_Free(disps); NCI_Free(blocklens); goto mpi_io; @@ -2164,17 +2300,21 @@ mgetput(NC *ncp, if (num_reqs == 1) { NC_lead_req *lead = lead_list + reqs[0].lead_off; if (fIsSet(lead->flag, NC_REQ_SKIP)) - len = 0; + buf_count = 0; else { +#if MPI_VERSION >= 3 + buf_count = reqs[0].nelems * lead->varp->xsz; +#else MPI_Offset req_size = reqs[0].nelems * lead->varp->xsz; - if (req_size > INT_MAX) { /* skip this request */ + if (req_size > NC_MAX_INT) { /* skip this request */ if (status == NC_NOERR) DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) fSet(lead->flag, NC_REQ_SKIP); - len = 0; /* skip this request */ + buf_count = 0; /* skip this request */ } else - len = (int)req_size; + buf_count = req_size; +#endif } buf = reqs[0].xbuf; } @@ -2193,14 +2333,18 @@ mgetput(NC *ncp, req_size = reqs[i].nelems * lead->varp->xsz; +#if MPI_VERSION >= 3 + blocklens[j] = req_size; +#else /* check int overflow */ - if (req_size > INT_MAX) { /* int overflows, skip this request */ + if (req_size > NC_MAX_INT) { /* int overflows, skip this request */ if (status == NC_NOERR) /* keep the 1st encountered error */ DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) fSet(lead->flag, NC_REQ_SKIP); continue; /* skip this request */ } blocklens[j] = (int)req_size; +#endif MPI_Get_address(reqs[i].xbuf, &ai); if (j == 0) { /* first valid request */ @@ -2211,13 +2355,21 @@ mgetput(NC *ncp, req_size = blocklens[last_contig_req]; req_size += blocklens[j]; +#if MPI_VERSION >= 3 + if (ai - a_last_contig == blocklens[last_contig_req]) { + /* user buffer of request j is contiguous from j-1 + * we coalesce j to j-1 */ + blocklens[last_contig_req] += blocklens[j]; + } +#else /* if req_size overflows 4-byte int, then skip coalescing */ - if (req_size <= INT_MAX && + if (req_size <= NC_MAX_INT && ai - a_last_contig == blocklens[last_contig_req]) { /* user buffer of request j is contiguous from j-1 * we coalesce j to j-1 */ blocklens[last_contig_req] += blocklens[j]; } +#endif else if (j > 0) { /* not contiguous from request last_contig_req */ last_contig_req++; @@ -2232,7 +2384,7 @@ mgetput(NC *ncp, if (last_contig_req == 0) { /* user buffers can be concatenated into a contiguous buffer */ buf_type = MPI_BYTE; - len = blocklens[0]; + buf_count = blocklens[0]; } else { /* after possible concatenating the user buffers, the true number @@ -2240,8 +2392,13 @@ mgetput(NC *ncp, int num_contig_reqs = last_contig_req+1; /* concatenate buffer addresses into a single buffer type */ +#if MPI_VERSION >= 3 + mpireturn = MPI_Type_create_hindexed_c(num_contig_reqs, blocklens, + disps, MPI_BYTE, &buf_type); +#else mpireturn = MPI_Type_create_hindexed(num_contig_reqs, blocklens, disps, MPI_BYTE, &buf_type); +#endif if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn,"MPI_Type_create_hindexed"); /* return the first encountered error if there is any */ @@ -2256,7 +2413,7 @@ mgetput(NC *ncp, } } - len = 1; + buf_count = 1; } } /* if (buf_type == MPI_BYTE) then the whole buf is contiguous */ @@ -2292,12 +2449,12 @@ mgetput(NC *ncp, if (err != NC_NOERR) { if (status == NC_NOERR) status = err; if (coll_indep == NC_REQ_INDEP) return status; - len = 0; + buf_count = 0; } /* call MPI_File_read/MPI_File_write */ - err = ncmpio_read_write(ncp, rw_flag, coll_indep, offset, len, buf_type, - buf, ((buf_type == MPI_BYTE) ? 1 : 0)); + err = ncmpio_read_write(ncp, rw_flag, coll_indep, offset, buf_count, + buf_type, buf, ((buf_type == MPI_BYTE) ? 1 : 0)); if (status == NC_NOERR) status = err; if (buf_type != MPI_BYTE) MPI_Type_free(&buf_type); diff --git a/test/largefile/Makefile.am b/test/largefile/Makefile.am index 1bab6f09a..e5931f646 100644 --- a/test/largefile/Makefile.am +++ b/test/largefile/Makefile.am @@ -29,7 +29,9 @@ TESTPROGRAMS = large_files \ large_dims_vars_attrs \ high_dim_var \ tst_cdf5_begin \ - large_coalesce + large_coalesce \ + large_header \ + large_reqs if HAS_FORTRAN TESTPROGRAMS += bigrecords diff --git a/test/largefile/large_coalesce.c b/test/largefile/large_coalesce.c index 8c80ed292..a1a95ff3f 100644 --- a/test/largefile/large_coalesce.c +++ b/test/largefile/large_coalesce.c @@ -35,7 +35,6 @@ int main(int argc, char** argv) MPI_Offset start[2], count[2]; MPI_Info info; size_t i; - int bb_enabled=0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -69,11 +68,8 @@ int main(int argc, char** argv) MPI_Info_set(info, "romio_cb_write", "enable"); MPI_Info_set(info, "romio_ds_read", "disable"); /* run slow without it */ -#if defined(ENABLE_LARGE_SINGLE_REQ) || defined(ENABLE_BURST_BUFFER) -#else /* silence iternal debug messages */ setenv("PNETCDF_SAFE_MODE", "0", 1); -#endif #ifdef ENABLE_NETCDF4 /* Test for NetCDF 4 first as ncvalidator checks only read classic files */ @@ -153,18 +149,6 @@ int main(int argc, char** argv) CHECK_ERR MPI_Info_free(&info); - { - int flag; - char hint[MPI_MAX_INFO_VAL]; - MPI_Info infoused; - - ncmpi_inq_file_info(ncid, &infoused); - MPI_Info_get(infoused, "nc_burst_buf", MPI_MAX_INFO_VAL - 1, hint, &flag); - if (flag && strcasecmp(hint, "enable") == 0) - bb_enabled = 1; - MPI_Info_free(&infoused); - } - /* define dimensions */ err = ncmpi_def_dim(ncid, "NPROCS", nprocs, &dimid[0]); CHECK_ERR @@ -181,16 +165,8 @@ int main(int argc, char** argv) CHECK_ERR /* now we are in data mode */ -#if defined(ENABLE_LARGE_SINGLE_REQ) || defined(ENABLE_BURST_BUFFER) -#ifndef ENABLE_LARGE_SINGLE_REQ - if (bb_enabled) { -#endif for (i=0; i<20; i++) buf[ONE_G-10+i] = 'a'+i; for (i=0; i<20; i++) buf[TWO_G-10+i] = 'A'+i; -#ifndef ENABLE_LARGE_SINGLE_REQ - } -#endif -#endif start[0] = rank; count[0] = 1; @@ -215,10 +191,6 @@ int main(int argc, char** argv) CHECK_ERR err = ncmpi_wait_all(ncid, 3, req, st); -#if defined(ENABLE_LARGE_SINGLE_REQ) || defined(ENABLE_BURST_BUFFER) -#ifndef ENABLE_LARGE_SINGLE_REQ - if (bb_enabled) { -#endif CHECK_ERR /* read back to check contents */ @@ -249,14 +221,6 @@ int main(int argc, char** argv) /* test the same pattern but for iget */ for (i=0; i Date: Wed, 13 Mar 2024 12:49:34 -0500 Subject: [PATCH 7/9] add 2 new tests for large requests --- test/largefile/large_header.c | 121 ++++++++++++ test/largefile/large_reqs.c | 341 ++++++++++++++++++++++++++++++++++ 2 files changed, 462 insertions(+) create mode 100644 test/largefile/large_header.c create mode 100644 test/largefile/large_reqs.c diff --git a/test/largefile/large_header.c b/test/largefile/large_header.c new file mode 100644 index 000000000..c6c2f7f0e --- /dev/null +++ b/test/largefile/large_header.c @@ -0,0 +1,121 @@ +/********************************************************************* + * + * Copyright (C) 2024, Northwestern University and Argonne National Laboratory + * See COPYRIGHT notice in top-level directory. + * + *********************************************************************/ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * This program is to test + * + * large header size, i.e. > INT_MAX, i.e. 2 GiB + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include /* strcpy() */ +#include /* basename() */ +#include /* INT_MAX */ +#include +#include +#include + +int main(int argc, char** argv) +{ + char filename[256]; + int rank, nprocs, err, nerrs=0; + int ncid, cmode, dimid, varid, buf; + MPI_Offset extent, start; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + /* get command-line arguments */ + if (argc > 2) { + if (!rank) printf("Usage: %s [filename]\n",argv[0]); + MPI_Finalize(); + return 1; + } + if (argc == 2) snprintf(filename, 256, "%s", argv[1]); + else strcpy(filename, "testfile.nc"); + MPI_Bcast(filename, 256, MPI_CHAR, 0, MPI_COMM_WORLD); + + if (rank == 0) { + char *cmd_str = (char*)malloc(strlen(argv[0]) + 256); + sprintf(cmd_str, "*** TESTING C %s for large header ", basename(argv[0])); + printf("%-66s ------ ", cmd_str); fflush(stdout); + free(cmd_str); + } + + /* create a new file for writing ----------------------------------------*/ + cmode = NC_CLOBBER | NC_64BIT_DATA; + err = ncmpi_create(MPI_COMM_WORLD, filename, cmode, MPI_INFO_NULL, &ncid); + CHECK_ERR + + /* define a dimension of size = nprocs */ + err = ncmpi_def_dim(ncid, "dim", nprocs, &dimid); + CHECK_ERR + + /* define a variable */ + err = ncmpi_def_var(ncid, "var", NC_INT, 1, &dimid, &varid); + CHECK_ERR + + /* make file header extent > 4 GiB */ + extent = (MPI_Offset)INT_MAX + 1024; + err = ncmpi__enddef(ncid, 0, extent, 0, 0); + CHECK_ERR + + /* write to the variable */ + start = rank; + buf = rank; + err = ncmpi_put_var1_int_all(ncid, varid, &start, &buf); + CHECK_ERR + + err = ncmpi_close(ncid); + CHECK_ERR + + err = ncmpi_open(MPI_COMM_WORLD, filename, NC_NOWRITE, MPI_INFO_NULL, + &ncid); CHECK_ERR + + /* inquire ID of the variable */ + err = ncmpi_inq_varid(ncid, "var", &varid); + CHECK_ERR + + /* read from the variable */ + buf = -1; + err = ncmpi_get_var1_int_all(ncid, varid, &start, &buf); + CHECK_ERR + + if (buf != rank) { + nerrs++; + printf("Error at line %d in %s: expecting read buf %d but got %d\n", + __LINE__,__FILE__,rank,buf); + } + + err = ncmpi_close(ncid); CHECK_ERR + + /* check if PnetCDF freed all internal malloc */ + MPI_Offset malloc_size, sum_size; + err = ncmpi_inq_malloc_size(&malloc_size); + if (err == NC_NOERR) { + MPI_Reduce(&malloc_size, &sum_size, 1, MPI_OFFSET, MPI_SUM, 0, MPI_COMM_WORLD); + if (rank == 0 && sum_size > 0) { + printf("heap memory allocated by PnetCDF internally has %lld bytes yet to be freed\n", + sum_size); + ncmpi_inq_malloc_list(); + } + } + + MPI_Allreduce(MPI_IN_PLACE, &nerrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if (rank == 0) { + if (nerrs) printf(FAIL_STR,nerrs); + else printf(PASS_STR); + } + + MPI_Finalize(); + return (nerrs > 0); +} + diff --git a/test/largefile/large_reqs.c b/test/largefile/large_reqs.c new file mode 100644 index 000000000..5a32d6227 --- /dev/null +++ b/test/largefile/large_reqs.c @@ -0,0 +1,341 @@ +/********************************************************************* + * + * Copyright (C) 2024, Northwestern University and Argonne National Laboratory + * See COPYRIGHT notice in top-level directory. + * + *********************************************************************/ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * This program is to test writing and reading > 4GB in a single call to + * MPI_File_write call. The user buffer is of size > 4GB per MPI rank. + * + * Two tests are includes: + * 1. writing/reading one large variable (> 4GB) + * 2. writing/reading multiple smaller variables of total size > 4GB. + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include /* strcpy() */ +#include /* basename() */ +#include +#include +#include + +#define NY 1280 +#define NX 1048576 + +static int verbose; + +static +int tst_one_var(char *filename, MPI_Comm comm) +{ + size_t i, buf_len; + int rank, nprocs, err, nerrs=0, ncid, cmode, varid, dimid[3], psize[2]; + int *buf; + MPI_Offset start[3], count[3]; + + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &rank); + + /* Creates a division of processors in a cartesian grid */ + psize[0] = psize[1] = 0; + MPI_Dims_create(nprocs, 2, psize); + + /* Test classic CDF-5 format */ + /* create a new file for writing ----------------------------------------*/ + cmode = NC_CLOBBER | NC_64BIT_DATA; + err = ncmpi_create(comm, filename, cmode, MPI_INFO_NULL, &ncid); + CHECK_ERR + + /* define dimensions Z, Y, and X */ + err = ncmpi_def_dim(ncid, "time", NC_UNLIMITED, &dimid[0]); CHECK_ERR + err = ncmpi_def_dim(ncid, "Y", NY*psize[0], &dimid[1]); CHECK_ERR + err = ncmpi_def_dim(ncid, "X", NX*psize[1], &dimid[2]); CHECK_ERR + + /* define a big 2D fixed-size variable of integer type */ + err = ncmpi_def_var(ncid, "var", NC_INT, 3, dimid, &varid); CHECK_ERR + + /* do not forget to exit define mode */ + err = ncmpi_enddef(ncid); CHECK_ERR + + /* now we are in data mode */ + start[0] = 0; + start[1] = NY * (rank / psize[1]); + start[2] = NX * (rank % psize[1]); + count[0] = 1; + + if (verbose) { + fflush(stdout); + MPI_Barrier(comm); + if (rank == 0) { + float len = (float)NY*psize[0]*NX*psize[1]*sizeof(int); + printf("\nglobal array is of size %d x %d = %.1f GiB\n", + NY*psize[0], NX*psize[1], len/1073741824); + } + printf("rank %d start=%lld %lld\n", rank, start[1],start[2]); + } + + /* user buffer is contiguous */ + buf_len = (size_t)NY * NX; + buf = (int*) malloc(buf_len * sizeof(int)); + for (i=0; i 2) { + if (!rank) printf("Usage: %s [filename]\n",argv[0]); + MPI_Finalize(); + return 1; + } + if (argc == 2) snprintf(filename, 256, "%s", argv[1]); + else strcpy(filename, "testfile.nc"); + MPI_Bcast(filename, 256, MPI_CHAR, 0, MPI_COMM_WORLD); + + if (rank == 0) { + char *cmd_str = (char*)malloc(strlen(argv[0]) + 256); + sprintf(cmd_str, "*** TESTING C %s for large requests ", basename(argv[0])); + printf("%-66s ------ ", cmd_str); fflush(stdout); + free(cmd_str); + } + + color = 1; + + if (nprocs > 2) { + /* run on 2 ranks only, as this test allocates memory > 4GB per rank */ + /* split MPI_COMM_WORLD based on 'color' and use the same rank order */ + color = (rank < 2) ? 1 : 0; + MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); + } + else + comm = MPI_COMM_WORLD; + + if (color) { + /* test one big variable */ + nerrs += tst_one_var(filename, comm); + + /* test a large number of smaller variables */ + nerrs += tst_vars(filename, comm); + } + + if (comm != MPI_COMM_WORLD) MPI_Comm_free(&comm); + + /* check if PnetCDF freed all internal malloc */ + MPI_Offset malloc_size, sum_size; + err = ncmpi_inq_malloc_size(&malloc_size); + if (err == NC_NOERR) { + MPI_Reduce(&malloc_size, &sum_size, 1, MPI_OFFSET, MPI_SUM, 0, MPI_COMM_WORLD); + if (rank == 0 && sum_size > 0) + printf("heap memory allocated by PnetCDF internally has %lld bytes yet to be freed\n", + sum_size); + } + + MPI_Allreduce(MPI_IN_PLACE, &nerrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if (rank == 0) { + if (nerrs) printf(FAIL_STR,nerrs); + else printf(PASS_STR); + } + + MPI_Finalize(); + return (nerrs > 0); +} + From 20174543ad517320229944058fb2d1e03a90075f Mon Sep 17 00:00:00 2001 From: wkliao Date: Wed, 13 Mar 2024 19:52:42 -0500 Subject: [PATCH 8/9] Check availability individual large count MPI functions PnetCDF uses the following MPI datatype construction functions for large requests. MPI_Type_create_subarray_c MPI_Type_contiguous_c MPI_Type_create_hvector_c MPI_Type_create_struct_c MPI_Type_create_hindexed_c MPI_Type_vector_c MPI_Pack_c MPI_Unpack_c MPI_Type_get_true_extent_x MPI_Type_size_x If all are available, then define HAVE_MPI_LARGE_COUNT and proceed with constructing large datatypes for fileview and user buffer. Checking MPI_VERSION against 3 is not sufficient, as some MPI (e.g. OpenMPI 5.0.2) does not support MPI_XXX_c functions. --- configure.ac | 21 ++++++++++++ src/drivers/common/create_imaptype.c | 4 +-- src/drivers/common/pack_unpack.c | 24 ++++++++------ src/drivers/ncbbio/ncbbio_log.c | 4 +-- src/drivers/ncbbio/ncbbio_var.c | 2 +- src/drivers/ncmpio/ncmpio_file_io.c | 48 +++++++++++++++++++++------- src/drivers/ncmpio/ncmpio_filetype.c | 38 ++++++++++++++-------- src/drivers/ncmpio/ncmpio_fill.c | 16 +++++----- src/drivers/ncmpio/ncmpio_subfile.c | 2 +- src/drivers/ncmpio/ncmpio_util.c | 8 ++--- src/drivers/ncmpio/ncmpio_vard.c | 24 +++++++------- src/drivers/ncmpio/ncmpio_wait.c | 47 +++++++++++++-------------- test/largefile/large_header.c | 9 ++++-- 13 files changed, 156 insertions(+), 91 deletions(-) diff --git a/configure.ac b/configure.ac index be0d823a3..d2c4409ed 100644 --- a/configure.ac +++ b/configure.ac @@ -145,6 +145,7 @@ AH_TEMPLATE([PNETCDF_PROFILING], [Define if to enable PnetCDF internal pe AH_TEMPLATE([ENABLE_THREAD_SAFE], [Define if to enable thread-safe capability]) AH_TEMPLATE([ENABLE_REQ_AGGREGATION], [Define if able to support request aggregation in nonblocking routines]) dnl AH_TEMPLATE([HAVE_MPI_COUNT], [Define if type MPI_Count is defined]) +AH_TEMPLATE([HAVE_MPI_LARGE_COUNT], [Define if required MPI APIs have arguments of type MPI_Count]) AH_TOP([#ifndef _CONFIG_H #define _CONFIG_H]) @@ -1314,6 +1315,26 @@ dnl MPI_Type_create_struct \ dnl MPI_Type_create_resized \ dnl MPI_Type_get_extent]) +dnl MPI_count was first introduced in MPI 3.0. Check MPI functions that make +dnl use of MPI_Count. +have_mpi_large_count_apis=yes +AC_CHECK_FUNCS([MPI_Type_create_subarray_c \ + MPI_Type_contiguous_c \ + MPI_Type_create_hvector_c \ + MPI_Type_create_struct_c \ + MPI_Type_create_hindexed_c \ + MPI_Type_vector_c \ + MPI_Pack_c \ + MPI_Unpack_c \ + MPI_Type_get_true_extent_x \ + MPI_Type_size_x], [], [have_mpi_large_count_apis=no]) +# If one of the above APIs is not available, have_mpi_large_count_apis will be +# set to no +UD_MSG_DEBUG([have_mpi_large_count_apis=$have_mpi_large_count_apis]) +if test "x$have_mpi_large_count_apis" = "xyes" ; then + AC_DEFINE(HAVE_MPI_LARGE_COUNT, 1) +fi + dnl Check presence of MPI COMBINERS. These are of type int. dnl These are introduced in MPI 2.0. As PnetCDF requires an MPI library that dnl supports MPI-IO and MPI-IO was first introduced in MPI 2.0, checking these diff --git a/src/drivers/common/create_imaptype.c b/src/drivers/common/create_imaptype.c index 03169d402..5f219702a 100644 --- a/src/drivers/common/create_imaptype.c +++ b/src/drivers/common/create_imaptype.c @@ -69,7 +69,7 @@ ncmpii_create_imaptype(int ndims, */ if (imap_contig_blocklen > NC_MAX_INT || count[dim] > NC_MAX_INT || imap[dim] > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_vector_c(count[dim], imap_contig_blocklen, imap[dim], itype, imaptype); if (mpireturn != MPI_SUCCESS) { @@ -99,7 +99,7 @@ ncmpii_create_imaptype(int ndims, MPI_Datatype tmptype; if (count[dim] > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_hvector_c(count[dim], 1, imap[dim]*el_size, *imaptype, &tmptype); if (mpireturn != MPI_SUCCESS) { diff --git a/src/drivers/common/pack_unpack.c b/src/drivers/common/pack_unpack.c index b6c502184..053eb3c13 100644 --- a/src/drivers/common/pack_unpack.c +++ b/src/drivers/common/pack_unpack.c @@ -42,20 +42,22 @@ ncmpii_pack(int ndims, MPI_Offset buf_size, nelems; MPI_Datatype etype, imaptype=MPI_DATATYPE_NULL; -#if MPI_VERSION >= 3 - MPI_Count position, type_size; - mpireturn = MPI_Type_size_c(buftype, &type_size); +#ifdef HAVE_MPI_TYPE_SIZE_X + MPI_Count type_size; + mpireturn = MPI_Type_size_x(buftype, &type_size); if (mpireturn != MPI_SUCCESS) { - err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_c"); + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_x"); DEBUG_RETURN_ERROR(err) } #else - int position, type_size; + int type_size; mpireturn = MPI_Type_size(buftype, &type_size); if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size"); DEBUG_RETURN_ERROR(err) } + else if (type_size == MPI_UNDEFINED) + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) #endif buf_size = type_size; @@ -107,11 +109,12 @@ ncmpii_pack(int ndims, /* allocate lbuf and pack buf into lbuf */ lbuf = NCI_Malloc((size_t)buf_size); if (lbuf == NULL) DEBUG_RETURN_ERROR(NC_ENOMEM) - position = 0; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT + MPI_Count position=0; MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, lbuf, (MPI_Count)buf_size, &position, MPI_COMM_SELF); #else + int position=0; if (buf_size > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) @@ -128,13 +131,14 @@ ncmpii_pack(int ndims, /* Step 2: pack lbuf to cbuf if imap is non-contiguous */ if (imaptype != MPI_DATATYPE_NULL) { /* true varm */ /* pack lbuf to cbuf, a contiguous buffer, using imaptype */ - position = 0; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT + MPI_Count position=0; *cbuf = NCI_Malloc((size_t)buf_size); MPI_Pack_c(lbuf, 1, imaptype, *cbuf, (MPI_Count)buf_size, &position, MPI_COMM_SELF); #else - if (buf_size > NC_MAX_INT) { + int position=0; + if (buf_size > NC_MAX_INT) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) *cbuf = NCI_Malloc((size_t)buf_size); diff --git a/src/drivers/ncbbio/ncbbio_log.c b/src/drivers/ncbbio/ncbbio_log.c index 4bb4e2178..27de3fbd5 100644 --- a/src/drivers/ncbbio/ncbbio_log.c +++ b/src/drivers/ncbbio/ncbbio_log.c @@ -362,7 +362,7 @@ int ncbbio_log_enddef(NC_bb *ncbbp) int ncbbio_log_close(NC_bb *ncbbp, int replay) { - int err; + int err, status=NC_NOERR; NC_bb_metadataheader* headerp; #ifdef PNETCDF_PROFILING @@ -484,7 +484,7 @@ int ncbbio_log_close(NC_bb *ncbbp, #endif #endif - return NC_NOERR; + return status; } /* diff --git a/src/drivers/ncbbio/ncbbio_var.c b/src/drivers/ncbbio/ncbbio_var.c index 9bce692a8..74e3dd274 100644 --- a/src/drivers/ncbbio/ncbbio_var.c +++ b/src/drivers/ncbbio/ncbbio_var.c @@ -417,7 +417,7 @@ ncbbio_put_varn(void *ncdp, bnelems *= elsize; if (bnelems > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position=0; cbuf = NCI_Malloc(bnelems); mpireturn = MPI_Pack_c((void*)buf, bufcount, buftype, cbuf, diff --git a/src/drivers/ncmpio/ncmpio_file_io.c b/src/drivers/ncmpio/ncmpio_file_io.c index d17dfb023..711d91554 100644 --- a/src/drivers/ncmpio/ncmpio_file_io.c +++ b/src/drivers/ncmpio/ncmpio_file_io.c @@ -28,18 +28,36 @@ ncmpio_read_write(NC *ncp, void *buf, int buftype_is_contig) { - int status=NC_NOERR, mpireturn, err; + int status=NC_NOERR, err=NC_NOERR, mpireturn; MPI_Status mpistatus; MPI_File fh; MPI_Offset req_size; -#if MPI_VERSION >= 3 + +#ifdef HAVE_MPI_TYPE_SIZE_X MPI_Count btype_size; /* MPI_Type_size_x is introduced in MPI 3.0 */ - MPI_Type_size_x(buf_type, &btype_size); + mpireturn = MPI_Type_size_x(buf_type, &btype_size); #else int btype_size; - MPI_Type_size(buf_type, &btype_size); + mpireturn = MPI_Type_size(buf_type, &btype_size); #endif + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size"); + /* return the first encountered error if there is any */ + err = (err == NC_EFILE) ? NC_EREAD : err; + } + else if (btype_size == MPI_UNDEFINED) + err = NC_EINTOVERFLOW; + + if (err != NC_NOERR) { + if (coll_indep == NC_REQ_COLL) { + DEBUG_ASSIGN_ERROR(status, err) + /* write nothing, but participate the collective call */ + buf_count = 0; + } + else + DEBUG_RETURN_ERROR(err) + } /* request size in bytes, may be > NC_MAX_INT */ req_size = (MPI_Offset)btype_size * buf_count; @@ -64,12 +82,15 @@ ncmpio_read_write(NC *ncp, MPI_Datatype xbuf_type=buf_type; if (buf_count > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type); MPI_Type_commit(&xbuf_type); xlen = 1; #else - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + if (coll_indep == NC_REQ_COLL) + DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) + else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) #endif } else if (buf_count > 0 && !buftype_is_contig && @@ -110,7 +131,7 @@ ncmpio_read_write(NC *ncp, /* return the first encountered error if there is any */ if (status == NC_NOERR) { err = (err == NC_EFILE) ? NC_EREAD : err; - DEBUG_ASSIGN_ERROR(status, err) + DEBUG_RETURN_ERROR(err) } } } @@ -125,7 +146,7 @@ ncmpio_read_write(NC *ncp, #endif } if (xbuf != buf) { /* unpack contiguous xbuf to noncontiguous buf */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count pos=0; MPI_Unpack_c(xbuf, xlen, &pos, buf, (MPI_Count)buf_count, buf_type, MPI_COMM_SELF); @@ -144,12 +165,15 @@ ncmpio_read_write(NC *ncp, MPI_Datatype xbuf_type=buf_type; if (buf_count > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type); MPI_Type_commit(&xbuf_type); xlen = 1; #else - DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) + if (coll_indep == NC_REQ_COLL) + DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) + else + DEBUG_RETURN_ERROR(NC_EINTOVERFLOW) #endif } else if (buf_count > 0 && !buftype_is_contig && @@ -160,7 +184,7 @@ ncmpio_read_write(NC *ncp, * noncontiguous. */ if (req_size > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count pos=0; xbuf = NCI_Malloc(req_size); MPI_Pack_c(buf, (MPI_Count)buf_count, buf_type, xbuf, @@ -203,7 +227,7 @@ ncmpio_read_write(NC *ncp, /* return the first encountered error if there is any */ if (status == NC_NOERR) { err = (err == NC_EFILE) ? NC_EWRITE : err; - DEBUG_ASSIGN_ERROR(status, err) + DEBUG_RETURN_ERROR(err) } } } diff --git a/src/drivers/ncmpio/ncmpio_filetype.c b/src/drivers/ncmpio/ncmpio_filetype.c index 4939bc28d..d2422da87 100644 --- a/src/drivers/ncmpio/ncmpio_filetype.c +++ b/src/drivers/ncmpio/ncmpio_filetype.c @@ -123,7 +123,7 @@ type_create_subarray64(int ndims, if (ndims == 0) DEBUG_RETURN_ERROR(NC_EDIMMETA) -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *sizes, *subsizes, *starts; sizes = (MPI_Count*) NCI_Malloc((size_t)ndims * 3 * sizeof(MPI_Count)); @@ -329,7 +329,7 @@ filetype_create_vara(const NC *ncp, /* previously, request size has been checked and it must > 0 */ if (IS_RECVAR(varp)) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count blocklength; #else int blocklength; @@ -358,7 +358,7 @@ filetype_create_vara(const NC *ncp, blocklength = varp->xsz; } -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT /* concatenate number of count[0] subarray types into filetype */ err = MPI_Type_create_hvector_c(count[0], blocklength, ncp->recsize, rectype, &filetype); @@ -576,8 +576,8 @@ ncmpio_file_set_view(const NC *ncp, MPI_Comm_rank(ncp->comm, &rank); if (rank == 0) { /* prepend the whole file header to filetype */ - MPI_Datatype root_filetype, ftypes[2]; -#if MPI_VERSION >= 3 + MPI_Datatype root_filetype=MPI_BYTE, ftypes[2]; +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count blocklens[2]; MPI_Count disps[2]; #else @@ -585,8 +585,10 @@ ncmpio_file_set_view(const NC *ncp, MPI_Aint disps[2]; /* check if header size > 2^31 */ - if (ncp->begin_var > NC_MAX_INT) - DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW) + if (ncp->begin_var > NC_MAX_INT) { + status = NC_EINTOVERFLOW; + goto err_out; + } #endif /* first block is the header extent */ @@ -599,23 +601,31 @@ ncmpio_file_set_view(const NC *ncp, disps[1] = *offset; ftypes[1] = filetype; -#if (MPI_VERSION < 3) && (SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET) +#if !defined(HAVE_MPI_LARGE_COUNT) && (SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET) if (*offset > NC_MAX_INT) { - blocklens[1] = 0; - DEBUG_ASSIGN_ERROR(status, NC_EAINT_TOO_SMALL) + status = NC_EINTOVERFLOW; + goto err_out; } #endif -#if MPI_VERSION >= 3 - MPI_Type_create_struct_c(2, blocklens, disps, ftypes, &root_filetype); +#ifdef HAVE_MPI_LARGE_COUNT + mpireturn = MPI_Type_create_struct_c(2, blocklens, disps, ftypes, + &root_filetype); #else - MPI_Type_create_struct(2, blocklens, disps, ftypes, &root_filetype); + mpireturn = MPI_Type_create_struct(2, blocklens, disps, ftypes, + &root_filetype); #endif + if (mpireturn != MPI_SUCCESS) { + err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_struct"); + if (status == NC_NOERR) status = err; + } MPI_Type_commit(&root_filetype); +err_out: TRACE_IO(MPI_File_set_view)(fh, 0, MPI_BYTE, root_filetype, "native", MPI_INFO_NULL); - MPI_Type_free(&root_filetype); + if (root_filetype != MPI_BYTE) + MPI_Type_free(&root_filetype); /* now update the explicit offset to be used in MPI-IO call later */ *offset = ncp->begin_var; diff --git a/src/drivers/ncmpio/ncmpio_fill.c b/src/drivers/ncmpio/ncmpio_fill.c index 49fbc902b..437070dd8 100644 --- a/src/drivers/ncmpio/ncmpio_fill.c +++ b/src/drivers/ncmpio/ncmpio_fill.c @@ -211,7 +211,7 @@ fill_var_rec(NC *ncp, bufType = MPI_BYTE; if (count > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_contiguous_c((MPI_Count)count, MPI_BYTE, &bufType); if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c"); @@ -374,7 +374,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) MPI_Status mpistatus; NC_var *varp; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklengths, *offset; #else int *blocklengths; @@ -423,7 +423,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) /* find the number of write segments (upper bound) */ nsegs = (size_t)(ncp->vars.ndefined + ncp->vars.num_rec_vars * nrecs); count = (MPI_Offset*) NCI_Malloc(nsegs * SIZEOF_MPI_OFFSET); -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT offset = (MPI_Count*) NCI_Malloc(nsegs * sizeof(MPI_Count)); #else offset = (MPI_Aint*) NCI_Malloc(nsegs * SIZEOF_MPI_AINT); @@ -515,7 +515,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } /* allocate one contiguous buffer space for all writes */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklengths = (MPI_Count*) NCI_Malloc((size_t)j * sizeof(MPI_Count)); #else blocklengths = (int*) NCI_Malloc((size_t)j * SIZEOF_INT); @@ -543,7 +543,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } count[k] *= varp->xsz; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklengths[k] = (MPI_Count)count[k]; #else if (count[k] != (int)count[k]) { @@ -578,7 +578,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } count[k] *= varp->xsz; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklengths[k] = (MPI_Count)count[k]; #else if (count[k] != (int)count[k]) { @@ -600,7 +600,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) } else { /* create fileview: a list of contiguous segment for each variable */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_hindexed_c(k, blocklengths, offset, MPI_BYTE, &filetype); #else @@ -628,7 +628,7 @@ fillerup_aggregate(NC *ncp, NC *old_ncp) bufType = MPI_BYTE; if (buf_len > NC_MAX_INT) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_contiguous_c((MPI_Count)buf_len, MPI_BYTE, &bufType); if (mpireturn != MPI_SUCCESS) { diff --git a/src/drivers/ncmpio/ncmpio_subfile.c b/src/drivers/ncmpio/ncmpio_subfile.c index 0586c9a42..eef114dbd 100644 --- a/src/drivers/ncmpio/ncmpio_subfile.c +++ b/src/drivers/ncmpio/ncmpio_subfile.c @@ -785,7 +785,7 @@ ncmpio_subfile_getput_vars(NC *ncp, MPI_Offset outsize = bnelems * bufcount * el_size; cbuf = NCI_Malloc((size_t)outsize); if (fIsSet(reqMode, NC_REQ_WR)) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position=0; MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, cbuf, (MPI_Count)outsize, &position, MPI_COMM_SELF); diff --git a/src/drivers/ncmpio/ncmpio_util.c b/src/drivers/ncmpio/ncmpio_util.c index 6579c9425..3940feee7 100644 --- a/src/drivers/ncmpio/ncmpio_util.c +++ b/src/drivers/ncmpio/ncmpio_util.c @@ -402,7 +402,7 @@ ncmpio_pack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ if (buf != lbuf) { /* pack buf into lbuf based on buftype */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position = 0; MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, lbuf, (MPI_Count)ibuf_size, &position, MPI_COMM_SELF); @@ -437,7 +437,7 @@ ncmpio_pack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ } /* pack lbuf to cbuf based on imaptype */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position = 0; MPI_Pack_c(lbuf, 1, imaptype, cbuf, (MPI_Count)ibuf_size, &position, MPI_COMM_SELF); @@ -670,7 +670,7 @@ ncmpio_unpack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ /* unpacked cbuf into lbuf based on imap -------------------------------*/ if (imaptype != MPI_DATATYPE_NULL) { /* unpack cbuf to lbuf based on imaptype */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position = 0; MPI_Unpack_c(cbuf, (MPI_Count)ibuf_size, &position, lbuf, 1, imaptype, MPI_COMM_SELF); @@ -687,7 +687,7 @@ ncmpio_unpack_xbuf(int fmt, /* NC_FORMAT_CDF2 NC_FORMAT_CDF5 etc. */ /* unpacked lbuf into buf based on buftype -----------------------------*/ if (!buftype_is_contig && lbuf != buf) { /* no need unpack when buftype is used in MPI_File_read (lbuf == buf) */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count position = 0; MPI_Unpack_c(lbuf, (MPI_Count)ibuf_size, &position, buf, (MPI_Count)bufcount, buftype, MPI_COMM_SELF); diff --git a/src/drivers/ncmpio/ncmpio_vard.c b/src/drivers/ncmpio/ncmpio_vard.c index be958ee59..5fab2b27f 100644 --- a/src/drivers/ncmpio/ncmpio_vard.c +++ b/src/drivers/ncmpio/ncmpio_vard.c @@ -59,13 +59,7 @@ getput_vard(NC *ncp, MPI_File fh; MPI_Offset nelems=0, fnelems=0, bnelems=0, offset=0; MPI_Datatype etype=MPI_DATATYPE_NULL, xtype=MPI_BYTE; -#if MPI_VERSION >= 3 - MPI_Count filetype_size=0; - MPI_Count true_lb=0, true_ub=0, true_extent=0; -#else - int filetype_size=0; - MPI_Aint true_lb=0, true_ub=0, true_extent=0; -#endif + MPI_Offset filetype_size; #ifdef ENABLE_SUBFILING /* call a separate routine if variable is stored in subfiles */ @@ -90,14 +84,18 @@ getput_vard(NC *ncp, * MPI_Type_create_hindexed), we need to find the true last byte accessed * by this request, true_ub, in order to calculate new_numrecs. */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_TYPE_SIZE_X /* MPI_Type_size_x is introduced in MPI 3.0 */ - mpireturn = MPI_Type_size_x(filetype, &filetype_size); + MPI_Count true_lb=0, true_ub=0, true_extent=0; + MPI_Count type_size; + + mpireturn = MPI_Type_size_x(filetype, &type_size); if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_x"); xtype = MPI_BYTE; goto err_check; } + filetype_size = type_size; /* MPI_Type_get_true_extent_x is introduced in MPI 3.0 */ MPI_Type_get_true_extent_x(filetype, &true_lb, &true_extent); true_ub = true_lb + true_extent; @@ -106,16 +104,20 @@ getput_vard(NC *ncp, * cannot be used for large filetypes. Prior to MPI 3.0 standard, argument * "size" of MPI_Type_size is of type int. When int overflows, the returned * value in argument "size" may be a negative. */ - mpireturn = MPI_Type_size(filetype, &filetype_size); + MPI_Aint true_lb=0, true_ub=0, true_extent=0; + int type_size; + + mpireturn = MPI_Type_size(filetype, &type_size); if (mpireturn != MPI_SUCCESS) { err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size"); xtype = MPI_BYTE; goto err_check; } - if (filetype_size < 0) { /* int overflow */ + if (type_size == MPI_UNDEFINED) { /* int overflow */ DEBUG_ASSIGN_ERROR(err, NC_EINTOVERFLOW) goto err_check; } + filetype_size = type_size; MPI_Type_get_true_extent(filetype, &true_lb, &true_extent); true_ub = true_lb + true_extent; #endif diff --git a/src/drivers/ncmpio/ncmpio_wait.c b/src/drivers/ncmpio/ncmpio_wait.c index d90c8d759..587c2f83b 100644 --- a/src/drivers/ncmpio/ncmpio_wait.c +++ b/src/drivers/ncmpio/ncmpio_wait.c @@ -327,7 +327,7 @@ static int construct_filetypes(NC *ncp, NC_lead_req *lead_list, /* NC_REQ_WR or NC_REQ_RD */ int num_reqs, -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklens, /* [num_reqs] temp buffer */ MPI_Count *disps, /* [num_reqs] temp buffer */ #else @@ -413,7 +413,7 @@ construct_filetypes(NC *ncp, coalesced_len = blocklens[j]; if (last_contig_req >= 0) coalesced_len += blocklens[last_contig_req]; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT if (last_contig_req >= 0 && disps[j] - disps[last_contig_req] == blocklens[last_contig_req]) { @@ -458,7 +458,7 @@ construct_filetypes(NC *ncp, int mpireturn; if (all_ftype_contig) { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_hindexed_c(num_reqs, blocklens, disps, MPI_BYTE, filetype); #else @@ -473,7 +473,7 @@ construct_filetypes(NC *ncp, } } else { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_struct_c(num_reqs, blocklens, disps, ftypes, filetype); #else @@ -506,7 +506,7 @@ construct_filetypes(NC *ncp, static int construct_buffertypes(NC_lead_req *lead_list, int num_reqs, -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklens, /* [num_reqs] temp buffer */ MPI_Count *disps, /* [num_reqs] temp buffer */ #else @@ -540,7 +540,7 @@ construct_buffertypes(NC_lead_req *lead_list, for (k=1; kvarp->ndims; k++) req_size *= count[k]; } -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklens[j] = req_size; #else /* check int overflow */ @@ -562,7 +562,7 @@ construct_buffertypes(NC_lead_req *lead_list, if (num_reqs > 0) { /* concatenate buffer addresses into a single buffer type */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_hindexed_c(num_reqs, blocklens, disps, MPI_BYTE, buf_type); #else @@ -1487,7 +1487,7 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ { int i, j, mpireturn; MPI_Offset next_off, next_len, true_nsegs; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklens; MPI_Count *disps; #else @@ -1519,7 +1519,7 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ } /* j+1 is the coalesced length */ true_nsegs = j + 1; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * true_nsegs); disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * true_nsegs); @@ -1601,7 +1601,7 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ } /* j+1 is the coalesced length */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT if (true_nsegs < j + 1) { blocklens = (MPI_Count*) NCI_Realloc(blocklens, (j+1) * sizeof(MPI_Count)); disps = (MPI_Count*) NCI_Realloc(disps, (j+1) * sizeof(MPI_Count)); @@ -1620,6 +1620,10 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ blocklens[j] = segs[i].len; } } + /* j+1 is the coalesced length */ + + mpireturn = MPI_Type_create_hindexed_c(j+1, blocklens, disps, MPI_BYTE, + buf_type); #else if (true_nsegs < j + 1) { blocklens = (int*) NCI_Realloc(blocklens, (j+1) * SIZEOF_INT); @@ -1649,13 +1653,8 @@ type_create_off_len(MPI_Offset nsegs, /* no. off-len pairs */ blocklens[j] = (int)segs[i].len; } } -#endif /* j+1 is the coalesced length */ -#if MPI_VERSION >= 3 - mpireturn = MPI_Type_create_hindexed_c(j+1, blocklens, disps, MPI_BYTE, - buf_type); -#else mpireturn = MPI_Type_create_hindexed(j+1, blocklens, disps, MPI_BYTE, buf_type); #endif @@ -1696,7 +1695,7 @@ req_aggregation(NC *ncp, { int i, gtype, err, status=NC_NOERR, ngroups, mpireturn, buf_len; int *group_index, *group_type, numLeadReqs; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklens, *f_blocklens, *b_blocklens; MPI_Count *disps, *f_disps, *b_disps; #else @@ -1827,7 +1826,7 @@ req_aggregation(NC *ncp, btypes = ftypes + ngroups; /* temp buffers, used by multiple calls to construct_filetypes() */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); disps = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); f_blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * ngroups); @@ -1953,7 +1952,7 @@ req_aggregation(NC *ncp, } else { /* concatenate all ftypes[] to filetype */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_struct_c(ngroups, f_blocklens, f_disps, ftypes, &filetype); #else @@ -1976,7 +1975,7 @@ req_aggregation(NC *ncp, } /* concatenate all btypes[] to buf_type */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_struct_c(ngroups, b_blocklens, b_disps, btypes, &buf_type); #else @@ -2260,7 +2259,7 @@ mgetput(NC *ncp, MPI_Offset offset=0, buf_count=0; MPI_File fh; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT MPI_Count *blocklens; MPI_Count *disps; blocklens = (MPI_Count*) NCI_Malloc(sizeof(MPI_Count) * num_reqs); @@ -2302,7 +2301,7 @@ mgetput(NC *ncp, if (fIsSet(lead->flag, NC_REQ_SKIP)) buf_count = 0; else { -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT buf_count = reqs[0].nelems * lead->varp->xsz; #else MPI_Offset req_size = reqs[0].nelems * lead->varp->xsz; @@ -2333,7 +2332,7 @@ mgetput(NC *ncp, req_size = reqs[i].nelems * lead->varp->xsz; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT blocklens[j] = req_size; #else /* check int overflow */ @@ -2355,7 +2354,7 @@ mgetput(NC *ncp, req_size = blocklens[last_contig_req]; req_size += blocklens[j]; -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT if (ai - a_last_contig == blocklens[last_contig_req]) { /* user buffer of request j is contiguous from j-1 * we coalesce j to j-1 */ @@ -2392,7 +2391,7 @@ mgetput(NC *ncp, int num_contig_reqs = last_contig_req+1; /* concatenate buffer addresses into a single buffer type */ -#if MPI_VERSION >= 3 +#ifdef HAVE_MPI_LARGE_COUNT mpireturn = MPI_Type_create_hindexed_c(num_contig_reqs, blocklens, disps, MPI_BYTE, &buf_type); #else diff --git a/test/largefile/large_header.c b/test/largefile/large_header.c index c6c2f7f0e..65c20d6f9 100644 --- a/test/largefile/large_header.c +++ b/test/largefile/large_header.c @@ -60,7 +60,9 @@ int main(int argc, char** argv) CHECK_ERR /* define a variable */ - err = ncmpi_def_var(ncid, "var", NC_INT, 1, &dimid, &varid); + err = ncmpi_def_var(ncid, "var0", NC_INT, 1, &dimid, &varid); + CHECK_ERR + err = ncmpi_def_var(ncid, "var1", NC_INT, 1, &dimid, &varid); CHECK_ERR /* make file header extent > 4 GiB */ @@ -77,11 +79,13 @@ int main(int argc, char** argv) err = ncmpi_close(ncid); CHECK_ERR + if (err != NC_NOERR) goto err_out; + err = ncmpi_open(MPI_COMM_WORLD, filename, NC_NOWRITE, MPI_INFO_NULL, &ncid); CHECK_ERR /* inquire ID of the variable */ - err = ncmpi_inq_varid(ncid, "var", &varid); + err = ncmpi_inq_varid(ncid, "var1", &varid); CHECK_ERR /* read from the variable */ @@ -109,6 +113,7 @@ int main(int argc, char** argv) } } +err_out: MPI_Allreduce(MPI_IN_PLACE, &nerrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0) { if (nerrs) printf(FAIL_STR,nerrs); From ca0c3f48809ebfaa25532bfd51f9b2e2755b3c34 Mon Sep 17 00:00:00 2001 From: wkliao Date: Thu, 14 Mar 2024 00:33:10 -0500 Subject: [PATCH 9/9] add note about PR #131 --- sneak_peek.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sneak_peek.md b/sneak_peek.md index 6bbfcf88e..976871548 100644 --- a/sneak_peek.md +++ b/sneak_peek.md @@ -15,9 +15,14 @@ This is essentially a placeholder for the next release note ... ncmpi_put_vara_float_all(ncid, varid, start, count, buf); ``` + PnetCDF now allows a single read/write request from a process of size - larger than 2 GiB. Large requests are passed down to the MP-IO library, as - many modern MPI-IO implementations can handle such large requests. This - feature thus deprecates the configure option `--enable-large-single-req`. + larger than 2 GiB. Such large requests are now passed down to the MP-IO + library. This change is because MPI 3.0 introduces the large count feature, + including MPI_Count data type, MPI_XXX_c and MPI_XXX_x APIs that use 8-byte + integer type to enable large MPI operations. As many MPI libraries today + have implemented this feature, PnetCDF can now take advantage of it to + support large single requests. Because of this change configure option + `--enable-large-single-req` is thus deprecated. See + See [PR #131](https://github.com/Parallel-NetCDF/PnetCDF/pull/131) * New optimization + none