From db772ce34c833489267513d66c45908d5cdea3c2 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Thu, 3 Aug 2023 15:47:28 -0600 Subject: [PATCH] Explicitly suppress variable length type compression re: PR https://github.com/Unidata/netcdf-c/pull/2716). re: Issue https://github.com/Unidata/netcdf-c/issues/2189 The basic change is to make use of the fact that HDF5 automatically suppresses optional filters when an attempt is made to apply them to variable-length typed arrays. This means that e.g. ncdump or nccopy will properly see meaningful data. Note that if a filter is defined as HDF5 mandatory, then the corresponding variable will be suppressed and will be invisible to ncdump and nccopy. This functionality is also propagated to NCZarr. This PR makes some minor changes to PR https://github.com/Unidata/netcdf-c/pull/2716 as follows: * Move the test for filter X variable-length from dfilter.c down into the dispatch table functions. * Make all filters for HDF5 optional rather than mandatory so that the built-in HDF5 test for filter X variable-length will be invoked. The test case for this was expanded to verify that the filters are defined, but suppressed. --- libdispatch/dfilter.c | 10 --- libhdf5/nc4hdf.c | 6 +- libnczarr/zfilter.c | 21 +++-- nc_test/test_byterange.sh | 2 + nc_test4/Makefile.am | 2 +- nc_test4/tst_filter_vlen.c | 152 +++++++++++++++++++++++++++++++----- nc_test4/tst_filter_vlen.sh | 5 +- 7 files changed, 155 insertions(+), 43 deletions(-) diff --git a/libdispatch/dfilter.c b/libdispatch/dfilter.c index 7c2cfe8c07..b0d0a1582a 100644 --- a/libdispatch/dfilter.c +++ b/libdispatch/dfilter.c @@ -126,19 +126,9 @@ nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const un { int stat = NC_NOERR; NC* ncp; - int fixedsize; - nc_type xtype; TRACE(nc_inq_var_filter); if((stat = NC_check_id(ncid,&ncp))) return stat; - /* Get variable' type */ - if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat; - /* If the variable's type is not fixed-size, then signal error */ - if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat; - if(!fixedsize) { - nclog(NCLOGWARN,"Filters cannot be applied to variable length data types."); - return NC_NOERR; /* Deliberately suppress */ - } if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done; done: return stat; diff --git a/libhdf5/nc4hdf.c b/libhdf5/nc4hdf.c index fb3770afcc..a759929c71 100644 --- a/libhdf5/nc4hdf.c +++ b/libhdf5/nc4hdf.c @@ -914,11 +914,7 @@ var_create_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, nc_bool_t write_dimid BAIL(NC_EFILTER); } else { herr_t code = H5Pset_filter(plistid, fi->filterid, -#if 1 - H5Z_FLAG_MANDATORY, -#else - H5Z_FLAG_OPTIONAL, -#endif + H5Z_FLAG_OPTIONAL, /* always make optional so filters on vlens are ignored */ fi->nparams, fi->params); if(code < 0) BAIL(NC_EFILTER); diff --git a/libnczarr/zfilter.c b/libnczarr/zfilter.c index 03047d95f7..9d2bc68f23 100644 --- a/libnczarr/zfilter.c +++ b/libnczarr/zfilter.c @@ -130,12 +130,13 @@ ncz_hdf5_clear(NCZ_HDF5* h) { typedef struct NCZ_Filter { int flags; /**< Flags describing state of this filter. */ -# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */ -# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */ -# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */ -# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */ -# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */ -# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */ +# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */ +# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */ +# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */ +# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */ +# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */ +# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */ +# define FLAG_SUPPRESS 64 /* If set, => filter should not be used (probably because variable is not fixed size */ NCZ_HDF5 hdf5; NCZ_Codec codec; struct NCZ_Plugin* plugin; /**< Implementation of this filter. */ @@ -389,6 +390,12 @@ NCZ_addfilter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, unsigned int id, size_t nclistpush((NClist*)var->filters, fi); } + /* If this variable is not fixed size, mark filter as suppressed */ + if(var->type_info->varsized) { + fi->flags |= FLAG_SUPPRESS; + nclog(NCLOGWARN,"Filters cannot be applied to variable length data types; ignored"); + } + if(!FILTERINCOMPLETE(fi)) { /* (over)write the HDF5 parameters */ nullfree(fi->hdf5.visible.params); @@ -870,6 +877,7 @@ fprintf(stderr,">>> current: alloc=%u used=%u buf=%p\n",(unsigned)current_alloc, if(encode) { for(i=0;iflags & FLAG_SUPPRESS) continue; /* this filter should not be applied */ ff = f->plugin->hdf5.filter; /* code can be simplified */ next_alloc = current_alloc; @@ -889,6 +897,7 @@ fprintf(stderr,">>> next: alloc=%u used=%u buf=%p\n",(unsigned)next_alloc,(unsig /* Apply in reverse order */ for(i=nclistlength(chain)-1;i>=0;i--) { f = (struct NCZ_Filter*)nclistget(chain,i); + if(f->flags & FLAG_SUPPRESS) continue; /* this filter should not be applied */ ff = f->plugin->hdf5.filter; /* code can be simplified */ next_alloc = current_alloc; diff --git a/nc_test/test_byterange.sh b/nc_test/test_byterange.sh index 38923bfa03..fe9a2cb487 100755 --- a/nc_test/test_byterange.sh +++ b/nc_test/test_byterange.sh @@ -86,7 +86,9 @@ ${NCDUMP} -n nc_enddef "$U" >tmp_${TAG}.cdl diff -wb tmp_$TAG.cdl ${srcdir}/nc_enddef.cdl } +if test "x$FEATURE_S3TESTS" = xyes ; then testsetup https://s3.us-east-1.amazonaws.com/unidata-zarr-test-data +fi echo "*** Testing reading NetCDF-3 file with http" diff --git a/nc_test4/Makefile.am b/nc_test4/Makefile.am index 67de44712a..f3633218e0 100644 --- a/nc_test4/Makefile.am +++ b/nc_test4/Makefile.am @@ -81,7 +81,7 @@ endif if USE_HDF5 if ENABLE_FILTER_TESTING extradir = -check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat test_filter_vlen +check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat check_PROGRAMS += tst_multifilter tst_filter_vlen TESTS += tst_filter.sh TESTS += tst_specific_filters.sh diff --git a/nc_test4/tst_filter_vlen.c b/nc_test4/tst_filter_vlen.c index 0248623963..827087bb51 100644 --- a/nc_test4/tst_filter_vlen.c +++ b/nc_test4/tst_filter_vlen.c @@ -17,11 +17,7 @@ #undef DEBUG -#ifndef H5Z_FILTER_BZIP2 -#define H5Z_FILTER_BZIP2 307 -#endif - -#define TEST_ID 32768 +#define FILTER_ID 1 /*deflate*/ #define MAXERRS 8 @@ -51,8 +47,7 @@ static int nerrs = 0; static int ncid, varid; static int dimids[MAXDIMS]; -static float* array = NULL; -static float* expected = NULL; +static char** array = NULL; /* Forward */ static int test_test1(void); @@ -100,26 +95,142 @@ defvar(nc_type xtype) return NC_NOERR; } +static int +reopen(void) +{ + size_t i; + + CHECK(nc_open(testfile, NC_NETCDF4, &ncid)); + for(i=0;i 0) { - fprintf(stderr,"*** id=%d\n",id); + memset(filterids,0,sizeof(filterids)); + params[0] = 5; + CHECK(nc_inq_var_filter_ids(ncid,varid,&nfilters,filterids)); + fprintf(stderr,"test_test1: nc_var_filter_ids: nfilters=%u filterids[0]=%d\n",(unsigned)nfilters,filterids[0]); + if(nfilters != 1 && filterids[0] != FILTER_ID) { + fprintf(stderr,"test_test1: nc_var_filter_ids: failed\n"); ok = 0; } - CHECK(nc_abort(ncid)); + params[0] = 0; + CHECK(nc_inq_var_filter_info(ncid, varid, filterids[0], &nparams, params)); + fprintf(stderr,"test_test1: nc_inq_var_filter_info: nparams=%u params[0]=%u\n",(unsigned)nparams,(unsigned)params[0]); + return ok; +} + +/* Test that a filter on a variable length var is suppressed */ +static int +test_test2(void) +{ + int stat = NC_NOERR; + int ok = 1; + size_t i; + + reset(); + fprintf(stderr,"test4: write with filter on a variable length type.\n"); + /* generate the data to write */ + for(i=0;i 0 ? "FAILED" : "PASS")); exit(nerrs > 0?1:0); } diff --git a/nc_test4/tst_filter_vlen.sh b/nc_test4/tst_filter_vlen.sh index 03beb080a5..a642acf37f 100755 --- a/nc_test4/tst_filter_vlen.sh +++ b/nc_test4/tst_filter_vlen.sh @@ -1,8 +1,6 @@ #!/bin/bash -# Test the filter install -# This cannot be run as a regular test -# because installation will not have occurred +# Test filters on non-fixed size variables. if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh @@ -95,6 +93,7 @@ if test "x$TESTNCZARR" = x1 ; then testset file if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testset zip ; fi if test "x$FEATURE_S3TESTS" = xyes ; then testset s3 ; fi + if test "x$FEATURE_S3TESTS" = xyes ; then s3sdkdelete "/${S3ISOPATH}" ; fi # Cleanup else testset nc fi