Skip to content

Commit

Permalink
Revise file close assertion failure fix (HDFGroup#3418)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhendersonHDF committed Aug 31, 2023
1 parent 0f8c167 commit 724ea87
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 42 deletions.
2 changes: 1 addition & 1 deletion src/H5C.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ H5C_prep_for_file_close(H5F_t *f)

#ifdef H5_HAVE_PARALLEL
if ((H5F_INTENT(f) & H5F_ACC_RDWR) && (!image_generated) && (cache_ptr->aux_ptr != NULL) &&
(f->shared->sblock) && (f->shared->fs_persist)) {
(f->shared->fs_persist)) {
/* If persistent free space managers are enabled, flushing the
* metadata cache may result in the deletion, insertion, and/or
* dirtying of entries.
Expand Down
5 changes: 5 additions & 0 deletions src/H5Cimage.c
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,11 @@ H5C__load_cache_image(H5F_t *f)
} /* end if */

done:
if (ret_value < 0) {
if (H5_addr_defined(cache_ptr->image_addr))
cache_ptr->image_buffer = H5MM_xfree(cache_ptr->image_buffer);
}

FUNC_LEAVE_NOAPI(ret_value)
} /* H5C__load_cache_image() */

Expand Down
10 changes: 5 additions & 5 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ static herr_t H5F__build_name(const char *prefix, const char *file_name, char **
static char *H5F__getenv_prefix_name(char **env_prefix /*in,out*/);
static H5F_t *H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf);
static herr_t H5F__check_if_using_file_locks(H5P_genplist_t *fapl, hbool_t *use_file_locking);
static herr_t H5F__dest(H5F_t *f, hbool_t flush);
static herr_t H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure);
static herr_t H5F__build_actual_name(const H5F_t *f, const H5P_genplist_t *fapl, const char *name,
char ** /*out*/ actual_name);
static herr_t H5F__flush_phase1(H5F_t *f);
Expand Down Expand Up @@ -1381,7 +1381,7 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
*-------------------------------------------------------------------------
*/
static herr_t
H5F__dest(H5F_t *f, hbool_t flush)
H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure)
{
herr_t ret_value = SUCCEED; /* Return value */

Expand Down Expand Up @@ -1648,7 +1648,7 @@ H5F__dest(H5F_t *f, hbool_t flush)
HDONE_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "problems closing file")
f->shared = NULL;

if (ret_value >= 0)
if ((ret_value >= 0) || free_on_failure)
f = H5FL_FREE(H5F_t, f);

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2145,7 +2145,7 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)

done:
if ((NULL == ret_value) && file)
if (H5F__dest(file, FALSE) < 0)
if (H5F__dest(file, FALSE, TRUE) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "problems closing file")

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2555,7 +2555,7 @@ H5F_try_close(H5F_t *f, hbool_t *was_closed /*out*/)
* shared H5F_shared_t struct. If the reference count for the H5F_shared_t
* struct reaches zero then destroy it also.
*/
if (H5F__dest(f, TRUE) < 0)
if (H5F__dest(f, TRUE, FALSE) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "problems closing file")

/* Since we closed the file, this should be set to TRUE */
Expand Down
20 changes: 17 additions & 3 deletions src/H5Fsuper.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,8 +1087,9 @@ H5F__super_init(H5F_t *f)
FALSE; /* Whether the driver info block has been inserted into the metadata cache */
H5P_genplist_t *plist; /* File creation property list */
H5AC_ring_t orig_ring = H5AC_RING_INV;
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size; /* Size of superblock, in bytes */
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size = 0; /* Size of superblock, in bytes */
haddr_t superblock_addr = HADDR_UNDEF;
size_t driver_size; /* Size of driver info block (bytes) */
unsigned super_vers = HDF5_SUPERBLOCK_VERSION_DEF; /* Superblock version for file */
H5O_loc_t ext_loc; /* Superblock extension object location */
Expand Down Expand Up @@ -1288,7 +1289,7 @@ H5F__super_init(H5F_t *f)
f->shared->sblock = sblock;

/* Allocate space for the superblock */
if (HADDR_UNDEF == H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size))
if (HADDR_UNDEF == (superblock_addr = H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size)))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for superblock")

/* set the drvinfo filed to NULL -- will overwrite this later if needed */
Expand Down Expand Up @@ -1479,6 +1480,19 @@ H5F__super_init(H5F_t *f)

/* Check if the superblock has been allocated yet */
if (sblock) {
if (non_default_fs_settings && H5_addr_defined(superblock_addr)) {
/*
* For non-default free-space settings, the allocation of
* space in the file for the superblock may have have allocated
* memory for the free-space manager and inserted it into the
* metadata cache. Clean that up before returning or we may fail
* to close the file later due to the metadata cache's metadata
* free space manager ring (H5AC_RING_MDFSM) not being clean.
*/
if (H5MF_try_close(f) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't close file free space manager");
}

/* Check if we've cached it already */
if (sblock_in_cache) {
/* Unpin superblock in cache */
Expand Down
68 changes: 35 additions & 33 deletions src/H5MF.c
Original file line number Diff line number Diff line change
Expand Up @@ -2654,16 +2654,14 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
hbool_t fsm_opened[H5F_MEM_PAGE_NTYPES]; /* State of FSM */
hbool_t fsm_visited[H5F_MEM_PAGE_NTYPES]; /* State of FSM */

/* Sanity check */
HDassert(f->shared->sblock);

/* should only be called if file is opened R/W */
HDassert(H5F_INTENT(f) & H5F_ACC_RDWR);

/* shouldn't be called unless we have a superblock supporting the
* superblock extension.
*/
HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);
if (f->shared->sblock)
HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);

/* Initialize fsm_opened and fsm_visited */
HDmemset(fsm_opened, 0, sizeof(fsm_opened));
Expand Down Expand Up @@ -2811,40 +2809,44 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
* file space manager info message is guaranteed to exist.
* Leave it in for now, but consider removing it.
*/
if (H5F_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension")
if (f->shared->sblock) {
if (H5F_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension")
}

/* As the final element in 1), shrink the EOA for the file */
if (H5MF__close_shrink_eoa(f) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTSHRINK, FAIL, "can't shrink eoa")

/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension")
if (f->shared->sblock) {
/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension")
}

/* 3) Scan all free space managers not involved in allocating
* space for free space managers. For each such free space
Expand Down

0 comments on commit 724ea87

Please sign in to comment.