Skip to content

Commit

Permalink
Check availability individual large count MPI functions
Browse files Browse the repository at this point in the history
PnetCDF uses the following MPI datatype construction functions for large
requests.
    MPI_Type_create_subarray_c
    MPI_Type_contiguous_c
    MPI_Type_create_hvector_c
    MPI_Type_create_struct_c
    MPI_Type_create_hindexed_c
    MPI_Type_vector_c
    MPI_Pack_c
    MPI_Unpack_c
    MPI_Type_get_true_extent_x
    MPI_Type_size_x
If all are available, then define HAVE_MPI_LARGE_COUNT and proceed with
constructing large datatypes for fileview and user buffer. Checking
MPI_VERSION against 3 is not sufficient, as some MPI (e.g. OpenMPI
5.0.2) does not support MPI_XXX_c functions.
  • Loading branch information
wkliao committed Mar 14, 2024
1 parent cc0d11b commit 2017454
Show file tree
Hide file tree
Showing 13 changed files with 156 additions and 91 deletions.
21 changes: 21 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ AH_TEMPLATE([PNETCDF_PROFILING], [Define if to enable PnetCDF internal pe
AH_TEMPLATE([ENABLE_THREAD_SAFE], [Define if to enable thread-safe capability])
AH_TEMPLATE([ENABLE_REQ_AGGREGATION], [Define if able to support request aggregation in nonblocking routines])
dnl AH_TEMPLATE([HAVE_MPI_COUNT], [Define if type MPI_Count is defined])
AH_TEMPLATE([HAVE_MPI_LARGE_COUNT], [Define if required MPI APIs have arguments of type MPI_Count])

AH_TOP([#ifndef _CONFIG_H
#define _CONFIG_H])
Expand Down Expand Up @@ -1314,6 +1315,26 @@ dnl MPI_Type_create_struct \
dnl MPI_Type_create_resized \
dnl MPI_Type_get_extent])

dnl MPI_count was first introduced in MPI 3.0. Check MPI functions that make
dnl use of MPI_Count.
have_mpi_large_count_apis=yes
AC_CHECK_FUNCS([MPI_Type_create_subarray_c \
MPI_Type_contiguous_c \
MPI_Type_create_hvector_c \
MPI_Type_create_struct_c \
MPI_Type_create_hindexed_c \
MPI_Type_vector_c \
MPI_Pack_c \
MPI_Unpack_c \
MPI_Type_get_true_extent_x \
MPI_Type_size_x], [], [have_mpi_large_count_apis=no])
# If one of the above APIs is not available, have_mpi_large_count_apis will be
# set to no
UD_MSG_DEBUG([have_mpi_large_count_apis=$have_mpi_large_count_apis])
if test "x$have_mpi_large_count_apis" = "xyes" ; then
AC_DEFINE(HAVE_MPI_LARGE_COUNT, 1)
fi

dnl Check presence of MPI COMBINERS. These are of type int.
dnl These are introduced in MPI 2.0. As PnetCDF requires an MPI library that
dnl supports MPI-IO and MPI-IO was first introduced in MPI 2.0, checking these
Expand Down
4 changes: 2 additions & 2 deletions src/drivers/common/create_imaptype.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ ncmpii_create_imaptype(int ndims,
*/
if (imap_contig_blocklen > NC_MAX_INT || count[dim] > NC_MAX_INT ||
imap[dim] > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
mpireturn = MPI_Type_vector_c(count[dim], imap_contig_blocklen,
imap[dim], itype, imaptype);
if (mpireturn != MPI_SUCCESS) {
Expand Down Expand Up @@ -99,7 +99,7 @@ ncmpii_create_imaptype(int ndims,
MPI_Datatype tmptype;

if (count[dim] > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
mpireturn = MPI_Type_create_hvector_c(count[dim], 1,
imap[dim]*el_size, *imaptype, &tmptype);
if (mpireturn != MPI_SUCCESS) {
Expand Down
24 changes: 14 additions & 10 deletions src/drivers/common/pack_unpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,22 @@ ncmpii_pack(int ndims,
MPI_Offset buf_size, nelems;
MPI_Datatype etype, imaptype=MPI_DATATYPE_NULL;

#if MPI_VERSION >= 3
MPI_Count position, type_size;
mpireturn = MPI_Type_size_c(buftype, &type_size);
#ifdef HAVE_MPI_TYPE_SIZE_X
MPI_Count type_size;
mpireturn = MPI_Type_size_x(buftype, &type_size);
if (mpireturn != MPI_SUCCESS) {
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_c");
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size_x");
DEBUG_RETURN_ERROR(err)
}
#else
int position, type_size;
int type_size;
mpireturn = MPI_Type_size(buftype, &type_size);
if (mpireturn != MPI_SUCCESS) {
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size");
DEBUG_RETURN_ERROR(err)
}
else if (type_size == MPI_UNDEFINED)
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
#endif
buf_size = type_size;

Expand Down Expand Up @@ -107,11 +109,12 @@ ncmpii_pack(int ndims,
/* allocate lbuf and pack buf into lbuf */
lbuf = NCI_Malloc((size_t)buf_size);
if (lbuf == NULL) DEBUG_RETURN_ERROR(NC_ENOMEM)
position = 0;
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count position=0;
MPI_Pack_c(buf, (MPI_Count)bufcount, buftype, lbuf,
(MPI_Count)buf_size, &position, MPI_COMM_SELF);
#else
int position=0;
if (buf_size > NC_MAX_INT)
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)

Expand All @@ -128,13 +131,14 @@ ncmpii_pack(int ndims,
/* Step 2: pack lbuf to cbuf if imap is non-contiguous */
if (imaptype != MPI_DATATYPE_NULL) { /* true varm */
/* pack lbuf to cbuf, a contiguous buffer, using imaptype */
position = 0;
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count position=0;
*cbuf = NCI_Malloc((size_t)buf_size);
MPI_Pack_c(lbuf, 1, imaptype, *cbuf, (MPI_Count)buf_size, &position,
MPI_COMM_SELF);
#else
if (buf_size > NC_MAX_INT) {
int position=0;
if (buf_size > NC_MAX_INT)
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)

*cbuf = NCI_Malloc((size_t)buf_size);
Expand Down
4 changes: 2 additions & 2 deletions src/drivers/ncbbio/ncbbio_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ int ncbbio_log_enddef(NC_bb *ncbbp)
int ncbbio_log_close(NC_bb *ncbbp,
int replay)
{
int err;
int err, status=NC_NOERR;
NC_bb_metadataheader* headerp;

#ifdef PNETCDF_PROFILING
Expand Down Expand Up @@ -484,7 +484,7 @@ int ncbbio_log_close(NC_bb *ncbbp,
#endif
#endif

return NC_NOERR;
return status;
}

/*
Expand Down
2 changes: 1 addition & 1 deletion src/drivers/ncbbio/ncbbio_var.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ ncbbio_put_varn(void *ncdp,
bnelems *= elsize;

if (bnelems > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count position=0;
cbuf = NCI_Malloc(bnelems);
mpireturn = MPI_Pack_c((void*)buf, bufcount, buftype, cbuf,
Expand Down
48 changes: 36 additions & 12 deletions src/drivers/ncmpio/ncmpio_file_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,36 @@ ncmpio_read_write(NC *ncp,
void *buf,
int buftype_is_contig)
{
int status=NC_NOERR, mpireturn, err;
int status=NC_NOERR, err=NC_NOERR, mpireturn;
MPI_Status mpistatus;
MPI_File fh;
MPI_Offset req_size;
#if MPI_VERSION >= 3

#ifdef HAVE_MPI_TYPE_SIZE_X
MPI_Count btype_size;
/* MPI_Type_size_x is introduced in MPI 3.0 */
MPI_Type_size_x(buf_type, &btype_size);
mpireturn = MPI_Type_size_x(buf_type, &btype_size);
#else
int btype_size;
MPI_Type_size(buf_type, &btype_size);
mpireturn = MPI_Type_size(buf_type, &btype_size);
#endif
if (mpireturn != MPI_SUCCESS) {
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size");
/* return the first encountered error if there is any */
err = (err == NC_EFILE) ? NC_EREAD : err;
}
else if (btype_size == MPI_UNDEFINED)
err = NC_EINTOVERFLOW;

if (err != NC_NOERR) {
if (coll_indep == NC_REQ_COLL) {
DEBUG_ASSIGN_ERROR(status, err)
/* write nothing, but participate the collective call */
buf_count = 0;
}
else
DEBUG_RETURN_ERROR(err)
}

/* request size in bytes, may be > NC_MAX_INT */
req_size = (MPI_Offset)btype_size * buf_count;
Expand All @@ -64,12 +82,15 @@ ncmpio_read_write(NC *ncp,
MPI_Datatype xbuf_type=buf_type;

if (buf_count > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type);
MPI_Type_commit(&xbuf_type);
xlen = 1;
#else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
if (coll_indep == NC_REQ_COLL)
DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW)
else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
#endif
}
else if (buf_count > 0 && !buftype_is_contig &&
Expand Down Expand Up @@ -110,7 +131,7 @@ ncmpio_read_write(NC *ncp,
/* return the first encountered error if there is any */
if (status == NC_NOERR) {
err = (err == NC_EFILE) ? NC_EREAD : err;
DEBUG_ASSIGN_ERROR(status, err)
DEBUG_RETURN_ERROR(err)
}
}
}
Expand All @@ -125,7 +146,7 @@ ncmpio_read_write(NC *ncp,
#endif
}
if (xbuf != buf) { /* unpack contiguous xbuf to noncontiguous buf */
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count pos=0;
MPI_Unpack_c(xbuf, xlen, &pos, buf, (MPI_Count)buf_count, buf_type,
MPI_COMM_SELF);
Expand All @@ -144,12 +165,15 @@ ncmpio_read_write(NC *ncp,
MPI_Datatype xbuf_type=buf_type;

if (buf_count > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type);
MPI_Type_commit(&xbuf_type);
xlen = 1;
#else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
if (coll_indep == NC_REQ_COLL)
DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW)
else
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
#endif
}
else if (buf_count > 0 && !buftype_is_contig &&
Expand All @@ -160,7 +184,7 @@ ncmpio_read_write(NC *ncp,
* noncontiguous.
*/
if (req_size > NC_MAX_INT) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count pos=0;
xbuf = NCI_Malloc(req_size);
MPI_Pack_c(buf, (MPI_Count)buf_count, buf_type, xbuf,
Expand Down Expand Up @@ -203,7 +227,7 @@ ncmpio_read_write(NC *ncp,
/* return the first encountered error if there is any */
if (status == NC_NOERR) {
err = (err == NC_EFILE) ? NC_EWRITE : err;
DEBUG_ASSIGN_ERROR(status, err)
DEBUG_RETURN_ERROR(err)
}
}
}
Expand Down
38 changes: 24 additions & 14 deletions src/drivers/ncmpio/ncmpio_filetype.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ type_create_subarray64(int ndims,

if (ndims == 0) DEBUG_RETURN_ERROR(NC_EDIMMETA)

#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count *sizes, *subsizes, *starts;

sizes = (MPI_Count*) NCI_Malloc((size_t)ndims * 3 * sizeof(MPI_Count));
Expand Down Expand Up @@ -329,7 +329,7 @@ filetype_create_vara(const NC *ncp,

/* previously, request size has been checked and it must > 0 */
if (IS_RECVAR(varp)) {
#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count blocklength;
#else
int blocklength;
Expand Down Expand Up @@ -358,7 +358,7 @@ filetype_create_vara(const NC *ncp,
blocklength = varp->xsz;
}

#if MPI_VERSION >= 3
#ifdef HAVE_MPI_LARGE_COUNT
/* concatenate number of count[0] subarray types into filetype */
err = MPI_Type_create_hvector_c(count[0], blocklength, ncp->recsize,
rectype, &filetype);
Expand Down Expand Up @@ -576,17 +576,19 @@ ncmpio_file_set_view(const NC *ncp,
MPI_Comm_rank(ncp->comm, &rank);
if (rank == 0) {
/* prepend the whole file header to filetype */
MPI_Datatype root_filetype, ftypes[2];
#if MPI_VERSION >= 3
MPI_Datatype root_filetype=MPI_BYTE, ftypes[2];
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count blocklens[2];
MPI_Count disps[2];
#else
int blocklens[2];
MPI_Aint disps[2];

/* check if header size > 2^31 */
if (ncp->begin_var > NC_MAX_INT)
DEBUG_ASSIGN_ERROR(status, NC_EINTOVERFLOW)
if (ncp->begin_var > NC_MAX_INT) {
status = NC_EINTOVERFLOW;
goto err_out;
}
#endif

/* first block is the header extent */
Expand All @@ -599,23 +601,31 @@ ncmpio_file_set_view(const NC *ncp,
disps[1] = *offset;
ftypes[1] = filetype;

#if (MPI_VERSION < 3) && (SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET)
#if !defined(HAVE_MPI_LARGE_COUNT) && (SIZEOF_MPI_AINT != SIZEOF_MPI_OFFSET)
if (*offset > NC_MAX_INT) {
blocklens[1] = 0;
DEBUG_ASSIGN_ERROR(status, NC_EAINT_TOO_SMALL)
status = NC_EINTOVERFLOW;
goto err_out;
}
#endif

#if MPI_VERSION >= 3
MPI_Type_create_struct_c(2, blocklens, disps, ftypes, &root_filetype);
#ifdef HAVE_MPI_LARGE_COUNT
mpireturn = MPI_Type_create_struct_c(2, blocklens, disps, ftypes,
&root_filetype);
#else
MPI_Type_create_struct(2, blocklens, disps, ftypes, &root_filetype);
mpireturn = MPI_Type_create_struct(2, blocklens, disps, ftypes,
&root_filetype);
#endif
if (mpireturn != MPI_SUCCESS) {
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_create_struct");
if (status == NC_NOERR) status = err;
}
MPI_Type_commit(&root_filetype);

err_out:
TRACE_IO(MPI_File_set_view)(fh, 0, MPI_BYTE, root_filetype, "native",
MPI_INFO_NULL);
MPI_Type_free(&root_filetype);
if (root_filetype != MPI_BYTE)
MPI_Type_free(&root_filetype);

/* now update the explicit offset to be used in MPI-IO call later */
*offset = ncp->begin_var;
Expand Down
Loading

0 comments on commit 2017454

Please sign in to comment.