From d5b224da008233a506de4caab64d05927b188a09 Mon Sep 17 00:00:00 2001 From: jhendersonHDF Date: Fri, 25 Aug 2023 08:10:23 -0500 Subject: [PATCH] Revise file close assertion failure fix (#3418) --- src/H5C.c | 2 +- src/H5Cimage.c | 5 ++++ src/H5Fint.c | 8 +++--- src/H5Fpkg.h | 2 +- src/H5Fsuper.c | 20 ++++++++++++--- src/H5MF.c | 68 ++++++++++++++++++++++++++------------------------ 6 files changed, 63 insertions(+), 42 deletions(-) diff --git a/src/H5C.c b/src/H5C.c index d99cf659db8..e8aa964054b 100644 --- a/src/H5C.c +++ b/src/H5C.c @@ -738,7 +738,7 @@ H5C_prep_for_file_close(H5F_t *f) #ifdef H5_HAVE_PARALLEL if ((H5F_INTENT(f) & H5F_ACC_RDWR) && (!image_generated) && (cache_ptr->aux_ptr != NULL) && - (f->shared->sblock) && (f->shared->fs_persist)) { + (f->shared->fs_persist)) { /* If persistent free space managers are enabled, flushing the * metadata cache may result in the deletion, insertion, and/or * dirtying of entries. diff --git a/src/H5Cimage.c b/src/H5Cimage.c index 7a3ed2510bc..20cba7f34cc 100644 --- a/src/H5Cimage.c +++ b/src/H5Cimage.c @@ -1160,6 +1160,11 @@ H5C__load_cache_image(H5F_t *f) } /* end if */ done: + if (ret_value < 0) { + if (H5_addr_defined(cache_ptr->image_addr)) + cache_ptr->image_buffer = H5MM_xfree(cache_ptr->image_buffer); + } + FUNC_LEAVE_NOAPI(ret_value) } /* H5C__load_cache_image() */ diff --git a/src/H5Fint.c b/src/H5Fint.c index 405de4ad7b1..008e9030e4c 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -1231,7 +1231,7 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F *------------------------------------------------------------------------- */ herr_t -H5F__dest(H5F_t *f, hbool_t flush) +H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure) { herr_t ret_value = SUCCEED; /* Return value */ @@ -1467,7 +1467,7 @@ H5F__dest(H5F_t *f, hbool_t flush) HDONE_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "problems closing file") f->shared = NULL; - if (ret_value >= 0) + if ((ret_value >= 0) || free_on_failure) f = H5FL_FREE(H5F_t, f); FUNC_LEAVE_NOAPI(ret_value) @@ -1972,7 +1972,7 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id) done: if ((NULL == ret_value) && file) - if (H5F__dest(file, FALSE) < 0) + if (H5F__dest(file, FALSE, TRUE) < 0) HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "problems closing file") FUNC_LEAVE_NOAPI(ret_value) @@ -2408,7 +2408,7 @@ H5F_try_close(H5F_t *f, hbool_t *was_closed /*out*/) * shared H5F_shared_t struct. If the reference count for the H5F_shared_t * struct reaches zero then destroy it also. */ - if (H5F__dest(f, TRUE) < 0) + if (H5F__dest(f, TRUE, FALSE) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "problems closing file") /* Since we closed the file, this should be set to TRUE */ diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index 643219d733b..b5df1c3d739 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -399,7 +399,7 @@ H5_DLLVAR htri_t use_locks_env_g; /* General routines */ H5_DLL H5F_t *H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf); -H5_DLL herr_t H5F__dest(H5F_t *f, hbool_t flush); +H5_DLL herr_t H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure); H5_DLL herr_t H5F__flush(H5F_t *f); H5_DLL htri_t H5F__is_hdf5(const char *name); H5_DLL ssize_t H5F__get_file_image(H5F_t *f, void *buf_ptr, size_t buf_len); diff --git a/src/H5Fsuper.c b/src/H5Fsuper.c index 21535929e31..2ad2ce73840 100644 --- a/src/H5Fsuper.c +++ b/src/H5Fsuper.c @@ -1085,8 +1085,9 @@ H5F__super_init(H5F_t *f) FALSE; /* Whether the driver info block has been inserted into the metadata cache */ H5P_genplist_t *plist; /* File creation property list */ H5AC_ring_t orig_ring = H5AC_RING_INV; - hsize_t userblock_size; /* Size of userblock, in bytes */ - hsize_t superblock_size; /* Size of superblock, in bytes */ + hsize_t userblock_size; /* Size of userblock, in bytes */ + hsize_t superblock_size = 0; /* Size of superblock, in bytes */ + haddr_t superblock_addr = HADDR_UNDEF; size_t driver_size; /* Size of driver info block (bytes) */ unsigned super_vers = HDF5_SUPERBLOCK_VERSION_DEF; /* Superblock version for file */ H5O_loc_t ext_loc; /* Superblock extension object location */ @@ -1286,7 +1287,7 @@ H5F__super_init(H5F_t *f) f->shared->sblock = sblock; /* Allocate space for the superblock */ - if (HADDR_UNDEF == H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size)) + if (HADDR_UNDEF == (superblock_addr = H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for superblock") /* set the drvinfo filed to NULL -- will overwrite this later if needed */ @@ -1477,6 +1478,19 @@ H5F__super_init(H5F_t *f) /* Check if the superblock has been allocated yet */ if (sblock) { + if (non_default_fs_settings && H5_addr_defined(superblock_addr)) { + /* + * For non-default free-space settings, the allocation of + * space in the file for the superblock may have have allocated + * memory for the free-space manager and inserted it into the + * metadata cache. Clean that up before returning or we may fail + * to close the file later due to the metadata cache's metadata + * free space manager ring (H5AC_RING_MDFSM) not being clean. + */ + if (H5MF_try_close(f) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't close file free space manager"); + } + /* Check if we've cached it already */ if (sblock_in_cache) { /* Unpin superblock in cache */ diff --git a/src/H5MF.c b/src/H5MF.c index 6e842990727..510a15be8c3 100644 --- a/src/H5MF.c +++ b/src/H5MF.c @@ -2681,16 +2681,14 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled) hbool_t fsm_opened[H5F_MEM_PAGE_NTYPES]; /* State of FSM */ hbool_t fsm_visited[H5F_MEM_PAGE_NTYPES]; /* State of FSM */ - /* Sanity check */ - HDassert(f->shared->sblock); - /* should only be called if file is opened R/W */ HDassert(H5F_INTENT(f) & H5F_ACC_RDWR); /* shouldn't be called unless we have a superblock supporting the * superblock extension. */ - HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2); + if (f->shared->sblock) + HDassert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2); /* Initialize fsm_opened and fsm_visited */ HDmemset(fsm_opened, 0, sizeof(fsm_opened)); @@ -2838,40 +2836,44 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled) * file space manager info message is guaranteed to exist. * Leave it in for now, but consider removing it. */ - if (H5F_addr_defined(f->shared->sblock->ext_addr)) - if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL, - "error in removing message from superblock extension") + if (f->shared->sblock) { + if (H5F_addr_defined(f->shared->sblock->ext_addr)) + if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL, + "error in removing message from superblock extension") + } /* As the final element in 1), shrink the EOA for the file */ if (H5MF__close_shrink_eoa(f) < 0) HGOTO_ERROR(H5E_RESOURCE, H5E_CANTSHRINK, FAIL, "can't shrink eoa") - /* 2) Ensure that space is allocated for the free space manager superblock - * extension message. Must do this now, before reallocating file space - * for free space managers, as it is possible that this allocation may - * grab the last section in a FSM -- making it unnecessary to - * re-allocate file space for it. - * - * Do this by writing a free space manager superblock extension message. - * - * Since no free space manager has file space allocated for it, this - * message must be invalid since we can't save addresses of FSMs when - * those addresses are unknown. This is OK -- we will write the correct - * values to the message at free space manager shutdown. - */ - for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++) - fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF; - fsinfo.strategy = f->shared->fs_strategy; - fsinfo.persist = f->shared->fs_persist; - fsinfo.threshold = f->shared->fs_threshold; - fsinfo.page_size = f->shared->fs_page_size; - fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres; - fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF; - - if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0) - HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL, - "error in writing fsinfo message to superblock extension") + if (f->shared->sblock) { + /* 2) Ensure that space is allocated for the free space manager superblock + * extension message. Must do this now, before reallocating file space + * for free space managers, as it is possible that this allocation may + * grab the last section in a FSM -- making it unnecessary to + * re-allocate file space for it. + * + * Do this by writing a free space manager superblock extension message. + * + * Since no free space manager has file space allocated for it, this + * message must be invalid since we can't save addresses of FSMs when + * those addresses are unknown. This is OK -- we will write the correct + * values to the message at free space manager shutdown. + */ + for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++) + fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF; + fsinfo.strategy = f->shared->fs_strategy; + fsinfo.persist = f->shared->fs_persist; + fsinfo.threshold = f->shared->fs_threshold; + fsinfo.page_size = f->shared->fs_page_size; + fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres; + fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF; + + if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL, + "error in writing fsinfo message to superblock extension") + } /* 3) Scan all free space managers not involved in allocating * space for free space managers. For each such free space