Skip to content

Commit

Permalink
Explicitly suppress variable length type compression
Browse files Browse the repository at this point in the history
re: PR #2716).
re: Issue #2189

The basic change is to make use of the fact that HDF5 automatically suppresses optional filters when an attempt is made to apply them to variable-length typed arrays.
This means that e.g. ncdump or nccopy will properly see meaningful data.
Note that if a filter is defined as HDF5 mandatory, then the corresponding variable will be suppressed and will be invisible to ncdump and nccopy.
This functionality is also propagated to NCZarr.

This PR makes some minor changes to PR #2716 as follows:
* Move the test for filter X variable-length from dfilter.c down into the dispatch table functions.
* Make all filters for HDF5 optional rather than mandatory so that the built-in HDF5 test for filter X variable-length will be invoked.

The test case for this was expanded to verify that the filters are defined, but suppressed.
  • Loading branch information
DennisHeimbigner committed Aug 3, 2023
1 parent 2cd9d26 commit db772ce
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 43 deletions.
10 changes: 0 additions & 10 deletions libdispatch/dfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,19 +126,9 @@ nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const un
{
int stat = NC_NOERR;
NC* ncp;
int fixedsize;
nc_type xtype;

TRACE(nc_inq_var_filter);
if((stat = NC_check_id(ncid,&ncp))) return stat;
/* Get variable' type */
if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat;
/* If the variable's type is not fixed-size, then signal error */
if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat;
if(!fixedsize) {
nclog(NCLOGWARN,"Filters cannot be applied to variable length data types.");
return NC_NOERR; /* Deliberately suppress */
}
if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done;
done:
return stat;
Expand Down
6 changes: 1 addition & 5 deletions libhdf5/nc4hdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,11 +914,7 @@ var_create_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, nc_bool_t write_dimid
BAIL(NC_EFILTER);
} else {
herr_t code = H5Pset_filter(plistid, fi->filterid,
#if 1
H5Z_FLAG_MANDATORY,
#else
H5Z_FLAG_OPTIONAL,
#endif
H5Z_FLAG_OPTIONAL, /* always make optional so filters on vlens are ignored */
fi->nparams, fi->params);
if(code < 0)
BAIL(NC_EFILTER);
Expand Down
21 changes: 15 additions & 6 deletions libnczarr/zfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,13 @@ ncz_hdf5_clear(NCZ_HDF5* h) {

typedef struct NCZ_Filter {
int flags; /**< Flags describing state of this filter. */
# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */
# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */
# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */
# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */
# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */
# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */
# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */
# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */
# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */
# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */
# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */
# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */
# define FLAG_SUPPRESS 64 /* If set, => filter should not be used (probably because variable is not fixed size */
NCZ_HDF5 hdf5;
NCZ_Codec codec;
struct NCZ_Plugin* plugin; /**< Implementation of this filter. */
Expand Down Expand Up @@ -389,6 +390,12 @@ NCZ_addfilter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, unsigned int id, size_t
nclistpush((NClist*)var->filters, fi);
}

/* If this variable is not fixed size, mark filter as suppressed */
if(var->type_info->varsized) {
fi->flags |= FLAG_SUPPRESS;
nclog(NCLOGWARN,"Filters cannot be applied to variable length data types; ignored");
}

if(!FILTERINCOMPLETE(fi)) {
/* (over)write the HDF5 parameters */
nullfree(fi->hdf5.visible.params);
Expand Down Expand Up @@ -870,6 +877,7 @@ fprintf(stderr,">>> current: alloc=%u used=%u buf=%p\n",(unsigned)current_alloc,
if(encode) {
for(i=0;i<nclistlength(chain);i++) {
f = (struct NCZ_Filter*)nclistget(chain,i);
if(f->flags & FLAG_SUPPRESS) continue; /* this filter should not be applied */
ff = f->plugin->hdf5.filter;
/* code can be simplified */
next_alloc = current_alloc;
Expand All @@ -889,6 +897,7 @@ fprintf(stderr,">>> next: alloc=%u used=%u buf=%p\n",(unsigned)next_alloc,(unsig
/* Apply in reverse order */
for(i=nclistlength(chain)-1;i>=0;i--) {
f = (struct NCZ_Filter*)nclistget(chain,i);
if(f->flags & FLAG_SUPPRESS) continue; /* this filter should not be applied */
ff = f->plugin->hdf5.filter;
/* code can be simplified */
next_alloc = current_alloc;
Expand Down
2 changes: 2 additions & 0 deletions nc_test/test_byterange.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ ${NCDUMP} -n nc_enddef "$U" >tmp_${TAG}.cdl
diff -wb tmp_$TAG.cdl ${srcdir}/nc_enddef.cdl
}

if test "x$FEATURE_S3TESTS" = xyes ; then
testsetup https://s3.us-east-1.amazonaws.com/unidata-zarr-test-data
fi

echo "*** Testing reading NetCDF-3 file with http"

Expand Down
2 changes: 1 addition & 1 deletion nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ endif
if USE_HDF5
if ENABLE_FILTER_TESTING
extradir =
check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat test_filter_vlen
check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat
check_PROGRAMS += tst_multifilter tst_filter_vlen
TESTS += tst_filter.sh
TESTS += tst_specific_filters.sh
Expand Down
152 changes: 134 additions & 18 deletions nc_test4/tst_filter_vlen.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@

#undef DEBUG

#ifndef H5Z_FILTER_BZIP2
#define H5Z_FILTER_BZIP2 307
#endif

#define TEST_ID 32768
#define FILTER_ID 1 /*deflate*/

#define MAXERRS 8

Expand Down Expand Up @@ -51,8 +47,7 @@ static int nerrs = 0;

static int ncid, varid;
static int dimids[MAXDIMS];
static float* array = NULL;
static float* expected = NULL;
static char** array = NULL;

/* Forward */
static int test_test1(void);
Expand Down Expand Up @@ -100,26 +95,142 @@ defvar(nc_type xtype)
return NC_NOERR;
}

static int
reopen(void)
{
size_t i;

CHECK(nc_open(testfile, NC_NETCDF4, &ncid));
for(i=0;i<ndims;i++) {
char dimname[1024];
snprintf(dimname,sizeof(dimname),"dim%u",(unsigned)i);
CHECK(nc_inq_dimid(ncid, dimname, &dimids[i]));
CHECK(nc_inq_dim(ncid, dimids[i], NULL, &dimsize[i]));
}
CHECK(nc_inq_varid(ncid, "var", &varid));
return NC_NOERR;
}



/* Test that a filter is a variable length var is defined */
static int
test_test1(void)
{
int ok = 1;
int id = -1;
size_t nparams;
size_t nfilters = 0;
unsigned filterids[64];
unsigned params[NPARAMS] = {5};
size_t nparams = 0;

reset();
fprintf(stderr,"test4: filter on a variable length type.\n");
create();
defvar(NC_STRING);
/* Do explicit filter; should never fail, but may produce log warning */
CHECK(nc_def_var_filter(ncid,varid,H5Z_FILTER_BZIP2,0,NULL));
CHECK(nc_def_var_filter(ncid,varid,FILTER_ID,1,params));
/* Now see if filter was defined or not */
CHECK(nc_inq_var_filter(ncid,varid,&id,&nparams,NULL));
if(id > 0) {
fprintf(stderr,"*** id=%d\n",id);
memset(filterids,0,sizeof(filterids));
params[0] = 5;
CHECK(nc_inq_var_filter_ids(ncid,varid,&nfilters,filterids));
fprintf(stderr,"test_test1: nc_var_filter_ids: nfilters=%u filterids[0]=%d\n",(unsigned)nfilters,filterids[0]);
if(nfilters != 1 && filterids[0] != FILTER_ID) {
fprintf(stderr,"test_test1: nc_var_filter_ids: failed\n");
ok = 0;
}
CHECK(nc_abort(ncid));
params[0] = 0;
CHECK(nc_inq_var_filter_info(ncid, varid, filterids[0], &nparams, params));
fprintf(stderr,"test_test1: nc_inq_var_filter_info: nparams=%u params[0]=%u\n",(unsigned)nparams,(unsigned)params[0]);
return ok;
}

/* Test that a filter on a variable length var is suppressed */
static int
test_test2(void)
{
int stat = NC_NOERR;
int ok = 1;
size_t i;

reset();
fprintf(stderr,"test4: write with filter on a variable length type.\n");
/* generate the data to write */
for(i=0;i<actualproduct;i++) {
char digits[64];
snprintf(digits,sizeof(digits),"%u",(unsigned)i);
array[i] = strdup(digits);
}
/* write the data */
if((stat=nc_put_var(ncid,varid,(void*)array))) {
fprintf(stderr,"test_test2: nc_put_var: error = (%d)%s\n",stat,nc_strerror(stat));
ok = 0;
goto done;
}
/* re-read the data */
reset();
if((stat=nc_get_var(ncid,varid,(void*)array))) {
fprintf(stderr,"test_test2: nc_get_var: error = (%d)%s\n",stat,nc_strerror(stat));
ok = 0;
goto done;
}
/* verify the data */
for(i=0;i<actualproduct;i++) {
unsigned value = 0xffffffff;
if(array[i] != NULL)
sscanf(array[i],"%u",&value);
if(array[i] == NULL || i != value) {
fprintf(stderr,"test_test2: nc_get_var: value mismatch at %u\n",(unsigned)i);
ok = 0;
goto done;
}
}
nc_close(ncid);
done:
return ok;
}

/* Test that a filter on a variable length var is suppressed */
static int
test_test3(void)
{
int stat = NC_NOERR;
int ok = 1;
size_t i,nfilters;
unsigned filterids[64];

fprintf(stderr,"test4: re-open variable with filter on a variable length type and verify state.\n");

reopen();

/* verify filter state */
memset(filterids,0,sizeof(filterids));
CHECK(nc_inq_var_filter_ids(ncid,varid,&nfilters,filterids));
fprintf(stderr,"test_test3: nc_var_filter_ids: nfilters=%u filterids[0]=%d\n",(unsigned)nfilters,filterids[0]);
if(nfilters != 1 && filterids[0] != FILTER_ID) {
fprintf(stderr,"test_test3: nc_var_filter_ids: failed\n");
ok = 0;
goto done;
}

/* re-read the data */
reset();
if((stat=nc_get_var(ncid,varid,(void*)array))) {
fprintf(stderr,"test_test3: nc_get_var: error = (%d)%s\n",stat,nc_strerror(stat));
ok = 0;
goto done;
}
/* verify the data */
for(i=0;i<actualproduct;i++) {
unsigned value = 0xffffffff;
if(array[i] != NULL)
sscanf(array[i],"%u",&value);
if(array[i] == NULL || i != value) {
fprintf(stderr,"test_test3: nc_get_var: value mismatch at %u\n",(unsigned)i);
ok = 0;
goto done;
}
}
nc_close(ncid);
done:
return ok;
}

Expand All @@ -129,7 +240,11 @@ test_test1(void)
static void
reset()
{
memset(array,0,sizeof(float)*actualproduct);
size_t i;
for(i=0;i<actualproduct;i++) {
if(array[i]) free(array[i]);
array[i] = NULL;
}
}

static void
Expand All @@ -153,8 +268,7 @@ init(int argc, char** argv)
}
}
/* Allocate max size */
array = (float*)calloc(1,sizeof(float)*actualproduct);
expected = (float*)calloc(1,sizeof(float)*actualproduct);
array = (char**)calloc(1,sizeof(char*)*actualproduct);
}

/**************************************************/
Expand All @@ -167,6 +281,8 @@ main(int argc, char **argv)
#endif
init(argc,argv);
if(!test_test1()) ERRR;
if(!test_test2()) ERRR;
if(!test_test3()) ERRR;
fprintf(stderr,"*** %s\n",(nerrs > 0 ? "FAILED" : "PASS"));
exit(nerrs > 0?1:0);
}
5 changes: 2 additions & 3 deletions nc_test4/tst_filter_vlen.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/bin/bash

# Test the filter install
# This cannot be run as a regular test
# because installation will not have occurred
# Test filters on non-fixed size variables.

if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
Expand Down Expand Up @@ -95,6 +93,7 @@ if test "x$TESTNCZARR" = x1 ; then
testset file
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testset zip ; fi
if test "x$FEATURE_S3TESTS" = xyes ; then testset s3 ; fi
if test "x$FEATURE_S3TESTS" = xyes ; then s3sdkdelete "/${S3ISOPATH}" ; fi # Cleanup
else
testset nc
fi

0 comments on commit db772ce

Please sign in to comment.