Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.12 Merge] Fix assertion failure during file close on error #3461

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,17 @@ Bug Fixes since HDF5-1.12.2 release
===================================
Library
-------
- Fixed an assertion failure in Parallel HDF5 when a file can't be created
due to an invalid library version bounds setting

An assertion failure could occur in H5MF_settle_raw_data_fsm when a file
can't be created with Parallel HDF5 due to specifying the use of a paged,
persistent file free space manager
(H5Pset_file_space_strategy(..., H5F_FSPACE_STRATEGY_PAGE, 1, ...)) with
an invalid library version bounds combination
(H5Pset_libver_bounds(..., H5F_LIBVER_EARLIEST, H5F_LIBVER_V18)). This
has now been fixed.

- Fixed a bug in H5Ocopy that could generate invalid HDF5 files

H5Ocopy was missing a check to determine whether the new object's
Expand Down
5 changes: 5 additions & 0 deletions src/H5Cimage.c
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,11 @@ H5C__load_cache_image(H5F_t *f)
} /* end if */

done:
if (ret_value < 0) {
if (H5F_addr_defined(cache_ptr->image_addr))
cache_ptr->image_buffer = H5MM_xfree(cache_ptr->image_buffer);
}

FUNC_LEAVE_NOAPI(ret_value)
} /* H5C__load_cache_image() */

Expand Down
10 changes: 5 additions & 5 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ static herr_t H5F__build_name(const char *prefix, const char *file_name, char **
static char *H5F__getenv_prefix_name(char **env_prefix /*in,out*/);
static H5F_t *H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf);
static herr_t H5F__check_if_using_file_locks(H5P_genplist_t *fapl, hbool_t *use_file_locking);
static herr_t H5F__dest(H5F_t *f, hbool_t flush);
static herr_t H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure);
static herr_t H5F__build_actual_name(const H5F_t *f, const H5P_genplist_t *fapl, const char *name,
char ** /*out*/ actual_name);
static herr_t H5F__flush_phase1(H5F_t *f);
Expand Down Expand Up @@ -1381,7 +1381,7 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
*-------------------------------------------------------------------------
*/
static herr_t
H5F__dest(H5F_t *f, hbool_t flush)
H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure)
{
herr_t ret_value = SUCCEED; /* Return value */

Expand Down Expand Up @@ -1648,7 +1648,7 @@ H5F__dest(H5F_t *f, hbool_t flush)
HDONE_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "problems closing file")
f->shared = NULL;

if (ret_value >= 0)
if ((ret_value >= 0) || free_on_failure)
f = H5FL_FREE(H5F_t, f);

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2145,7 +2145,7 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)

done:
if ((NULL == ret_value) && file)
if (H5F__dest(file, FALSE) < 0)
if (H5F__dest(file, FALSE, TRUE) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "problems closing file")

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2555,7 +2555,7 @@ H5F_try_close(H5F_t *f, hbool_t *was_closed /*out*/)
* shared H5F_shared_t struct. If the reference count for the H5F_shared_t
* struct reaches zero then destroy it also.
*/
if (H5F__dest(f, TRUE) < 0)
if (H5F__dest(f, TRUE, FALSE) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "problems closing file")

/* Since we closed the file, this should be set to TRUE */
Expand Down
20 changes: 17 additions & 3 deletions src/H5Fsuper.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,8 +1087,9 @@ H5F__super_init(H5F_t *f)
FALSE; /* Whether the driver info block has been inserted into the metadata cache */
H5P_genplist_t *plist; /* File creation property list */
H5AC_ring_t orig_ring = H5AC_RING_INV;
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size; /* Size of superblock, in bytes */
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size = 0; /* Size of superblock, in bytes */
haddr_t superblock_addr = HADDR_UNDEF;
size_t driver_size; /* Size of driver info block (bytes) */
unsigned super_vers = HDF5_SUPERBLOCK_VERSION_DEF; /* Superblock version for file */
H5O_loc_t ext_loc; /* Superblock extension object location */
Expand Down Expand Up @@ -1288,7 +1289,7 @@ H5F__super_init(H5F_t *f)
f->shared->sblock = sblock;

/* Allocate space for the superblock */
if (HADDR_UNDEF == H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size))
if (HADDR_UNDEF == (superblock_addr = H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size)))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for superblock")

/* set the drvinfo filed to NULL -- will overwrite this later if needed */
Expand Down Expand Up @@ -1479,6 +1480,19 @@ H5F__super_init(H5F_t *f)

/* Check if the superblock has been allocated yet */
if (sblock) {
if (non_default_fs_settings && H5F_addr_defined(superblock_addr)) {
/*
* For non-default free-space settings, the allocation of
* space in the file for the superblock may have have allocated
* memory for the free-space manager and inserted it into the
* metadata cache. Clean that up before returning or we may fail
* to close the file later due to the metadata cache's metadata
* free space manager ring (H5AC_RING_MDFSM) not being clean.
*/
if (H5MF_try_close(f) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't close file free space manager");
}

/* Check if we've cached it already */
if (sblock_in_cache) {
/* Unpin superblock in cache */
Expand Down
68 changes: 35 additions & 33 deletions src/H5MF.c
Original file line number Diff line number Diff line change
Expand Up @@ -2654,16 +2654,14 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
hbool_t fsm_opened[H5F_MEM_PAGE_NTYPES]; /* State of FSM */
hbool_t fsm_visited[H5F_MEM_PAGE_NTYPES]; /* State of FSM */

/* Sanity check */
HDassert(f->shared->sblock);

/* should only be called if file is opened R/W */
HDassert(H5F_INTENT(f) & H5F_ACC_RDWR);

/* shouldn't be called unless we have a superblock supporting the
* superblock extension.
*/
HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);
if (f->shared->sblock)
HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);

/* Initialize fsm_opened and fsm_visited */
HDmemset(fsm_opened, 0, sizeof(fsm_opened));
Expand Down Expand Up @@ -2811,40 +2809,44 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
* file space manager info message is guaranteed to exist.
* Leave it in for now, but consider removing it.
*/
if (H5F_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension")
if (f->shared->sblock) {
if (H5F_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension")
}

/* As the final element in 1), shrink the EOA for the file */
if (H5MF__close_shrink_eoa(f) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTSHRINK, FAIL, "can't shrink eoa")

/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension")
if (f->shared->sblock) {
/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension")
}

/* 3) Scan all free space managers not involved in allocating
* space for free space managers. For each such free space
Expand Down
51 changes: 51 additions & 0 deletions testpar/t_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -947,3 +947,54 @@ test_file_properties(void)
VRFY((mpi_ret >= 0), "MPI_Info_free succeeded");

} /* end test_file_properties() */

/*
* Tests for an assertion failure during file close that used
* to occur when the library fails to create a file in parallel
* due to an invalid library version bounds setting
*/
void
test_invalid_libver_bounds_file_close_assert(void)
{
const char *filename = NULL;
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
herr_t ret;
hid_t fid = H5I_INVALID_HID;
hid_t fapl_id = H5I_INVALID_HID;
hid_t fcpl_id = H5I_INVALID_HID;

filename = (const char *)GetTestParameters();

/* set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

/* setup file access plist */
fapl_id = H5Pcreate(H5P_FILE_ACCESS);
VRFY((fapl_id != H5I_INVALID_HID), "H5Pcreate");
ret = H5Pset_fapl_mpio(fapl_id, comm, info);
VRFY((SUCCEED == ret), "H5Pset_fapl_mpio");
ret = H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_V18);
VRFY((SUCCEED == ret), "H5Pset_libver_bounds");

/* setup file creation plist */
fcpl_id = H5Pcreate(H5P_FILE_CREATE);
VRFY((fcpl_id != H5I_INVALID_HID), "H5Pcreate");

ret = H5Pset_file_space_strategy(fcpl_id, H5F_FSPACE_STRATEGY_PAGE, TRUE, 1);
VRFY((SUCCEED == ret), "H5Pset_file_space_strategy");

/* create the file */
H5E_BEGIN_TRY
{
fid = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl_id, fapl_id);
}
H5E_END_TRY
VRFY((fid == H5I_INVALID_HID), "H5Fcreate");

ret = H5Pclose(fapl_id);
VRFY((SUCCEED == ret), "H5Pclose");
ret = H5Pclose(fcpl_id);
VRFY((SUCCEED == ret), "H5Pclose");
}
3 changes: 3 additions & 0 deletions testpar/testphdf5.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@ main(int argc, char **argv)

AddTest("props", test_file_properties, NULL, "Coll Metadata file property settings", PARATESTFILE);

AddTest("invlibverassert", test_invalid_libver_bounds_file_close_assert, NULL,
"Invalid libver bounds assertion failure", PARATESTFILE);

AddTest("idsetw", dataset_writeInd, NULL, "dataset independent write", PARATESTFILE);
AddTest("idsetr", dataset_readInd, NULL, "dataset independent read", PARATESTFILE);

Expand Down
1 change: 1 addition & 0 deletions testpar/testphdf5.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ void test_plist_ed(void);
void external_links(void);
void zero_dim_dset(void);
void test_file_properties(void);
void test_invalid_libver_bounds_file_close_assert(void);
void multiple_dset_write(void);
void multiple_group_write(void);
void multiple_group_read(void);
Expand Down