From 4e222bf5bb42a84378a89461fe28abd1874487c1 Mon Sep 17 00:00:00 2001
From: Allen Byrne <50328838+byrnHDF@users.noreply.github.com>
Date: Mon, 18 Mar 2024 10:36:55 -0500
Subject: [PATCH] Add filter plugin user guide text. Fix registered URL in docs
(#4169)
---
doxygen/aliases | 1 -
doxygen/dox/UsersGuide.dox | 2 +
doxygen/examples/H5.format.1.1.html | 4 +-
doxygen/examples/H5.format.2.0.html | 10 +-
doxygen/examples/H5.format.html | 8 +-
src/H5Dmodule.h | 83 +++++++--
src/H5PLmodule.h | 272 +++++++++++++++++++++++++---
src/H5Zmodule.h | 2 +-
8 files changed, 335 insertions(+), 47 deletions(-)
diff --git a/doxygen/aliases b/doxygen/aliases
index 4eb19621e74..ad868432bee 100644
--- a/doxygen/aliases
+++ b/doxygen/aliases
@@ -235,7 +235,6 @@ ALIASES += sa_metadata_ops="\sa \li H5Pget_all_coll_metadata_ops() \li H5Pget_co
################################################################################
ALIASES += ref_cons_semantics="Enabling a Strict Consistency Semantics Model in Parallel HDF5"
-ALIASES += ref_dld_filters="HDF5 Dynamically Loaded Filters"
ALIASES += ref_file_image_ops="HDF5 File Image Operations"
ALIASES += ref_filter_pipe="Data Flow Pipeline for H5Dread()"
ALIASES += ref_group_impls="Group implementations in HDF5"
diff --git a/doxygen/dox/UsersGuide.dox b/doxygen/dox/UsersGuide.dox
index 4f955e6074d..b6113ad15bd 100644
--- a/doxygen/dox/UsersGuide.dox
+++ b/doxygen/dox/UsersGuide.dox
@@ -134,6 +134,7 @@ HDF5 Release 1.14
\ref subsubsec_dataset_transfer_props
\ref subsubsec_dataset_transfer_store
\ref subsubsec_dataset_transfer_partial
+ \ref subsubsec_dataset_transfer_dyn_filter
\li \ref subsec_dataset_allocation
@@ -147,6 +148,7 @@ HDF5 Release 1.14
- \ref subsubsec_dataset_filters_nbit
- \ref subsubsec_dataset_filters_scale
- \ref subsubsec_dataset_filters_szip
+
- \ref subsubsec_dataset_filters_dyn
\ref sec_datatype
diff --git a/doxygen/examples/H5.format.1.1.html b/doxygen/examples/H5.format.1.1.html
index f5e4c4e0fe5..f437d9be970 100644
--- a/doxygen/examples/H5.format.1.1.html
+++ b/doxygen/examples/H5.format.1.1.html
@@ -5436,8 +5436,8 @@
filters requested and supported by third parties.
Filters supported by The HDF Group are documented immediately
below. Information on 3rd-party filters can be found at
-
- https://support.hdfgroup.org/services/contributions.html#filters
.
+
+ https://portal.hdfgroup.org/documentation/hdf5-docs/registered_filter_plugins.html
.
1
To request a filter identifier, please contact
diff --git a/doxygen/examples/H5.format.2.0.html b/doxygen/examples/H5.format.2.0.html
index bde030f3853..37cb7282eb1 100644
--- a/doxygen/examples/H5.format.2.0.html
+++ b/doxygen/examples/H5.format.2.0.html
@@ -12598,9 +12598,8 @@
HDF5 Library and for filters requested and supported by third
parties. Filters supported by The HDF Group are documented
immediately below. Information on 3rd-party filters can be found at
- The HDF Group’s
- Contributions page.
+ The HDF Group’s
+ Registered Filters page.
@@ -12854,9 +12853,8 @@
HDF5 Library and for filters requested and supported by third
parties. Filters supported by The HDF Group are documented
immediately below. Information on 3rd-party filters can be found at
- The HDF Group’s
- Contributions page.
+ The HDF Group’s
+ Registered Filters page.
diff --git a/doxygen/examples/H5.format.html b/doxygen/examples/H5.format.html
index 7aba5fed440..30bd37cf10f 100644
--- a/doxygen/examples/H5.format.html
+++ b/doxygen/examples/H5.format.html
@@ -14226,8 +14226,8 @@
To request a filter identifier, please contact
@@ -14488,8 +14488,8 @@
To request a filter identifier, please contact
diff --git a/src/H5Dmodule.h b/src/H5Dmodule.h
index 81f197d7147..27e5799968e 100644
--- a/src/H5Dmodule.h
+++ b/src/H5Dmodule.h
@@ -815,19 +815,10 @@
*
* Data pipeline filters
*
- * Filter |
+ * Built-in Filter |
* Description |
*
*
- * gzip compression |
- * Data compression using zlib. |
- *
- *
- * Szip compression |
- * Data compression using the Szip library. See The HDF Group website for more information
- * regarding the Szip filter. |
- *
- *
* N-bit compression |
* Data compression using an algorithm specialized for n-bit datatypes. |
*
@@ -844,6 +835,19 @@
*
* Fletcher32 |
* Fletcher32 checksum for error-detection. |
+ *
+ *
+ * Optional Built-in Filter |
+ * Description |
+ *
+ *
+ * gzip compression |
+ * Data compression using zlib. |
+ *
+ *
+ * szip compression |
+ * Data compression using the szip library. The HDF Group now uses the libaec library for the szip
+filter. |
*
*
*
@@ -861,6 +865,44 @@
* \li @see @ref subsubsec_dataset_filters_nbit
* \li @see @ref subsubsec_dataset_filters_scale
*
+ * \subsubsection subsubsec_dataset_transfer_dyn_filter Data Pipeline Dynamically Loaded Filters
+ * While the HDF5 “internal” compression methods work reasonably well on users’
+ * datasets, there are certain drawbacks to this implementation. First, the “internal” compression
+ * methods may not provide the optimal compression ratio, as do some newly developed or specialized
+ * compression methods. Secondly, if a data provider wants to use a “non-internal” compression for
+ * storing the data in HDF5, they have to write a filter function that uses the new compression method
+ * and then register it with the library. Data consumers of such HDF5 files will need to have the new filter
+ * function and use it with their applications to read the data, or they will need a modified version of the
+ * HDF5 Library that has the new filter as a part of the library.
+ *
+ * If a user of such data does not have a modified HDF5 Library installed on his system, command-line tools
+ * such as h5dump or h5ls will not be able to display the compressed data. Furthermore, it would be
+ * practically impossible to determine the compression method used, making the data stored in HDF5
+ * useless.
+ *
+ * It is clear that the internal HDF5 filter mechanism, while extensible, does not work well with third-party
+ * filters. It would be a maintenance nightmare to keep adding and supporting new compression methods
+ * in HDF5. For any set of HDF5 “internal” filters, there always will be data with which the “internal”
+filters
+ * will not achieve the optimal performance needed to address data I/O and storage problems. Thus the
+ * internal HDF5 filter mechanism is enhanced to address the issues discussed above.
+ *
+ * We have a feature of HDF5 called “dynamically loaded filters in HDF5.” This feature
+ * makes the HDF5 third-party filters available to an application at runtime. The third-party HDF5 filter
+ * function has to be a part of the HDF5 filter plugin installed on the system as a shared library or DLL.
+ *
+ * To use a third-party filter an HDF5 application should call the #H5Pset_filter function when setting the
+ * filter pipeline for a dataset creation property. The HDF5 Library will register the filter with the library
+ * and the filter will be applied when data is written to the file.
+ *
+ * When an application reads data compressed with a third-party HDF5 filter, the HDF5 Library will search
+ * for the required filter plugin, register the filter with the library (if the filter function is not
+registered) and
+ * apply it to the data on the read operation.
+ *
+ * For more information,
+ * \li @see @ref sec_filter_plugins
+ *
* \subsubsection subsubsec_dataset_transfer_drive File Drivers
* I/O is performed by the HDF5 virtual file layer. The file driver interface writes and reads blocks
* of data; each driver module implements the interface using different I/O mechanisms. The table
@@ -2685,8 +2727,25 @@ allocated if necessary.
* and minimum values, and they will get a much larger minimum-bits (poor compression)
*
*
- * \subsubsection subsubsec_dataset_filters_szip Using the Szip Filter
- * See The HDF Group website for further information regarding the Szip filter.
+ * \subsubsection subsubsec_dataset_filters_szip Using the SZip Filter
+ * See The HDF Group website for further information regarding the SZip filter.
+ *
+ * \subsubsection subsubsec_dataset_filters_dyn Using Dynamically-Loadable Filters
+ * \see \ref sec_filter_plugins for further information regarding the dynamically-loadable filters.
+ *
+ * HDF has a filter plugin repository of useful third-party plugins that can used
+ *
+ * Filter | SetFilter Params |
+ * BLOSC | UD=32001,0,0 |
+ * BSHUF | UD=32004,0,0 |
+ * BZIP2 | UD=307,0,1,9 |
+ * JPEG | UD=32019,0,4,q,c,r,t |
+ * LZ4 | UD=32004,0,1,3 |
+ * LZF | UD=32000,1,3,0,0,0 |
+ * SZ | UD=32017,1,5,2,7,20,40,0 |
+ * ZFP | UD=32013,1,0,0 |
+ * ZSTD | UD=32015,0,0 |
+ *
*
* Previous Chapter \ref sec_group - Next Chapter \ref sec_datatype
*
diff --git a/src/H5PLmodule.h b/src/H5PLmodule.h
index 37654869746..436e8bdfa0a 100644
--- a/src/H5PLmodule.h
+++ b/src/H5PLmodule.h
@@ -27,7 +27,257 @@
#define H5_MY_PKG_ERR H5E_PLUGIN
/** \page H5PL_UG The HDF5 Plugins
- * @todo Under Construction
+ *
+ * \section sec_filter_plugins HDF5 Filter Plugins
+ *
+ * \subsection subsec_filter_plugins_intro Introduction
+ * HDF5 supports compression of data using a stackable pipeline of filters which can be
+ * implemented for reading and writing datasets, both at runtime and post‐process.
+ * These filters are supported as dynamically loadable plugins, and users can even
+ * implement custom filters of their own design.
+ *
+ * \subsection subsec_filter_plugins_model Programming Model for Applications
+ * This section describes the programming model for an application that uses a third-party HDF5 filter
+ * plugin to write or read data. For simplicity of presentation, it is assumed that the HDF5 filter plugin is
+ * available on the system in a default location. The HDF5 filter plugin is discussed in detail in the
+ * \ref subsec_filter_plugins_prog section.
+ *
+ * \subsubsection subsubsec_filter_plugins_model_apply Applying a Third-party Filter When Creating and Writing
+ * a Dataset A third-party filter can be added to the HDF5 filter pipeline by using the H5Pset_filter
+ * function, as a user would do in the past. The identification number and the filter parameters should be
+ * available to the application. For example, if the application intends to apply the HDF5 bzip2 compression
+ * filter that was registered with The HDF Group and has an identification number 307
+ * (Registered
+ * Filters) then the application would follow the steps as outlined below: \code dcpl = H5Pcreate
+ * (H5P_DATASET_CREATE); status = H5Pset_filter (dcpl, (H5Z_filter_t)307, H5Z_FLAG_MANDATORY, (size_t)6,
+ * cd_values); dset = H5Dcreate (file, DATASET, H5T_STD_I32LE, space, H5P_DEFAULT, dcpl, status = H5Dwrite
+ * (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, wdata[0]); \endcode
+ *
+ * \subsubsection subsubsec_filter_plugins_model_read Reading Data with an Applied Third-party Filter
+ * An application does not need to do anything special to read the data with a third-party filter applied. For
+ * example, if one wants to read data written in the previous example, the following regular steps should be
+ * taken: \code file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT); dset = H5Dopen (file, DATASET,
+ * H5P_DEFAULT); H5Dread (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata[0]); \endcode
+ *
+ * The command-line utility h5dump, for example, will read and display the data as shown:
+ * \code
+ * HDF5 "h5ex_d_bzip2.h5" {
+ * GROUP "/" {
+ * DATASET "DS1" {
+ * DATATYPE H5T_STD_I32LE
+ * DATASPACE SIMPLE { ( 32, 64 ) / ( 32, 64 ) }
+ * STORAGE_LAYOUT {
+ * CHUNKED ( 4, 8 )
+ * SIZE 6410 (1.278:1 COMPRESSION)
+ * }
+ * FILTERS {
+ * USER_DEFINED_FILTER {
+ * FILTER_ID 307
+ * COMMENT HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html
+ * PARAMS { 2 }
+ * }
+ * }
+ * FILLVALUE {
+ * FILL_TIME H5D_FILL_TIME_IFSET
+ * VALUE H5D_FILL_VALUE_DEFAULT
+ * }
+ * ALLOCATION_TIME {
+ * H5D_ALLOC_TIME_INCR
+ * }
+ * DATA {
+ * ...
+ * }
+ * }
+ * }
+ * }
+ * \endcode
+ *
+ * If the filter can not be loaded then h5dump will show the following:
+ * \code
+ * ...
+ * }
+ * DATA {h5dump error: unable to print data
+ * }
+ * ...
+ * \endcode
+ *
+ * \subsubsection subsubsec_filter_plugins_model_custom A Word of Caution When Using Custom Filters
+ * Data goes through the HDF5 filter pipeline only when it is written to the file or read into application
+ * memory space from the file. For example, the I/O operation is triggered with a call to #H5Fflush, or when
+ * a data item (HDF5 metadata or a raw data chunk) is evicted from the cache or brought into the cache.
+ * Please notice that #H5Dread/#H5Dwrite calls on the chunked datasets do not necessarily trigger I/O since
+ * the HDF5 Library uses a separate chunk cache.
+ *
+ * A data item may remain in the cache until the HDF5 Library is closed. If the HDF5 plugin that has to be
+ * applied to the data item becomes unavailable before the file and all objects in the file are closed, an
+ * error will occur. The following example demonstrates the issue. Please notice the position of the
+ * #H5Zunregister call:
+ *
+ * \code
+ * // Create a new group using compression.
+ * gcpl = H5Pcreate (H5P_GROUP_CREATE);
+ * status = H5Pset_filter(gcpl,H5Z_FILTER_BZIP2,H5Z_FLAG_MANDATORY,(size_t)1, cd_values);
+ * group = H5Gcreate (file, GNAME, H5P_DEFAULT, gcpl, H5P_DEFAULT);
+ * for (i=0; i < NGROUPS; i++) {
+ * sprintf(name, "group_%d", i);
+ * tmp_id = H5Gcreate (group, name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ * status = H5Gclose(tmp_id);
+ * }
+ *
+ * status = H5Pclose (gcpl);
+ * status = H5Gclose (group);
+ *
+ * // Unregister the filter. Call to H5Fclose will fail because the library tries
+ * // to apply the filter that is not available anymore. This has a cascade effect
+ * // on H5Fclose.
+ * H5Zunregister(H5Z_FILTER_BZIP2);
+ * status = H5Fclose (file);
+ * \endcode
+ *
+ * Here is an error stack produced by the program:
+ * \code
+ * HDF5-DIAG: Error detected in HDF5 (xx.yy.zz) thread 0:
+ * #000: H5F.c line **** in H5Fclose(): decrementing file ID failed
+ * major: Object atom
+ * minor: Unable to close file
+ * #001: H5I.c line **** in H5I_dec_app_ref(): can't decrement ID ref count
+ * major: Object atom
+ * minor: Unable to decrement reference count
+ * #002: H5F.c line **** in H5F_close(): can't close file
+ * major: File accessibility
+ * minor: Unable to close file
+ * ...
+ * #026: H5Z.c line **** in H5Z_find(): required filter is not registered
+ * major: Data filters
+ * minor: Object not found
+ * \endcode
+ *
+ * To avoid the problem make sure to close all objects to which the filter is applied and flush them using
+ * the H5Fflush call before unregistering the filter.
+ *
+ * \subsection subsec_filter_plugins_prog Programming Model for HDF5 Filter Plugins
+ * This section describes how to create an HDF5 filter, an HDF5 filter plugin, and how to install the HDF5
+ * plugin on the system.
+ *
+ * \subsubsection subsubsec_filter_plugins_prog_write Writing a Filter Function
+ * The HDF5 filter function for the dynamically loaded filter feature should be written as any custom filter
+ * described in Custom Filters. See the
+ * “Example” section, section 5, of that document to get an idea of the simple filter function, and see the
+ * example of the more sophisticated HDF5 bzip2 filter function in the “Building an HDF5 bzip2 Plugin Example”
+ * section. The HDF5 bzip2 filter function is also available for download from Filter Plugin Repository.
+ *
+ * The user has to remember a few things when writing an HDF5 filter function.
+ * - 1. An HDF5 filter is bidirectional.
+ * The filter handles both input and output to the file; a flag is passed to the filter to indicate the
+ * direction.
+ * - 2. An HDF5 filter operates on a buffer.
+ * The filter reads data from a buffer, performs some sort of transformation on the data, places
+ * the result in the same or new buffer, and returns the buffer pointer and size to the caller.
+ * - 3. An HDF5 filter should return zero in the case of failure.
+ *
+ * The signature of the HDF5 filter function and the accompanying filter structure (see the section below)
+ * are described in the HDF5 Reference Manual #H5Z_filter_t.
+ *
+ * \subsubsection subsubsec_filter_plugins_prog_reg Registering a Filter with The HDF Group
+ * If you are writing a filter that will be used by others, it would be a good idea to request a filter
+ * identification number and register it with The HDF Group. Please follow the procedure described at
+ * Registered
+ * Filters.
+ *
+ * The HDF Group anticipates that developers of HDF5 filter plugins will not only register new filters, but
+ * will also provide links to the source code and/or binaries for the corresponding HDF5 filter plugins.
+ *
+ * It is very important for the users of the filter that developers provide filter information in the “name”
+ * field of the filter structure, for example:
+ * \code
+ * const H5Z_class2_t H5Z_BZIP2[1] = {{
+ * H5Z_CLASS_T_VERS, // H5Z_class_t version
+ * (H5Z_filter_t)H5Z_FILTER_BZIP2, // Filter id number
+ * 1, // encoder_present flag (set to true)
+ * 1, // decoder_present flag (set to true)
+ * "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html",
+ * // Filter name for debugging
+ * NULL, // The "can apply" callback
+ * NULL, // The "set local" callback
+ * (H5Z_func_t)H5Z_filter_bzip2, // The actual filter function
+ * }};
+ * \endcode
+ *
+ * The HDF5 Library and command-line tools have access to the “name” field. An application can
+ * use the H5Pget_filter<*> functions to retrieve information about the filters.
+ *
+ * Using the example of the structure above, the h5dump tool will print the string “HDF5 bzip2
+ * filter found at …” pointing users to the applied filter (see the example in the \ref
+ * subsubsec_filter_plugins_model_read section) thus solving the problem of the filter’s origin.
+ *
+ * \subsubsection subsubsec_filter_plugins_prog_create Creating an HDF5 Filter Plugin
+ * The HDF5 filter plugin source should include:
+ * - 1. The H5PLextern.h header file from the HDF5 distribution.
+ * - 2. The definition of the filter structure (see the example shown in the section above).
+ * - 3. The filter function (for example, H5Z_filter_bzip2).
+ * - 4. The two functions necessary for the HDF5 Library to find the correct type of the plugin library
+ * while loading it at runtime and to get information about the filter function:
+ *
H5PL_type_t H5PLget_plugin_type(void); |
+ * const void* H5PLget_plugin_info(void); |
+ * Here is an example of the functions above for the HDF5 bzip2 filter:
+ * H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} |
+ * const void* H5PLget_plugin_info(void) {return H5Z_BZIP2;} |
+ *
+ * - 5. Other functions such as the source of the compression library may also be included.
+ *
+ * Build the HDF5 filter plugin as a shared library. The following steps should be taken:
+ * - 1. When compiling, point to the HDF5 header files.
+ * - 2. Use the appropriate linking flags.
+ * - 3. Link with any required external libraries.
+ * - 4. For example, if libbz2.so is installed on a Linux system, the HDF5 bzip2 plugin library
+ * libH5Zbzip2.so may be linked with libbz2.so instead of including bzip2 source into the
+ * plugin library.
+ * The complete example of the HDF5 bzip2 plugin library is provided at
+ * BZIP2 Filter Plugin
+ * and can be adopted for other plugins.
+ *
+ * \subsubsection subsubsec_filter_plugins_prog_install Installing an HDF5 Filter Plugin
+ * The default directory for an HDF5 filter plugin library is defined on UNIX-like systems as
+ * \code
+ * “/usr/local/hdf5/lib/plugin”
+ * \endcode
+ * and on Windows systems as
+ * \code
+ * "%ALLUSERSPROFILE%/hdf5/lib/plugin".
+ * \endcode
+ *
+ * The default path can be overwritten by a user with the #HDF5_PLUGIN_PATH environment variable.
+ * Several directories can be specified for the search path using “:” as a path separator for UNIX-like
+ * systems and “;” for Windows.
+ *
+ * Readers are encouraged to try the example in the “Building an HDF5 bzip2 Plugin Example” section.
+ *
+ * \subsection subsec_filter_plugins_design Design
+ * Dynamic loading of the HDF5 filter plugin (or filter library) is triggered only by two events: when an
+ * application calls the #H5Pset_filter function to set the filter for the first time, or when the data to
+ * which the filter is applied is read for the first time.
+ *
+ * \subsection subsec_filter_plugins_build Building an HDF5 bzip2 Plugin Example
+ * The HDF Group provides an repository of the HDF5 filter plugin that can be checked out from
+ * BZIP2 Filter Plugin.
+ *
+ * It contains the source code for the bzip2
+ * plugin library and an example that uses the plugin. It requires the HDF5 Library with the dynamically
+ * loaded feature and the bzip2 library being available on the system.
+ * The plugin and the example can be built using configure or CMake commands. For instructions on how
+ * to build with CMake, see the README.txt file in the source code distribution. The bzip2 library that can
+ * be built with CMake is available from:
+ * \code
+ * GIT_URL: "https://github.com/libarchive/bzip2.git"
+ * GIT_BRANCH: "master"
+ * \endcode
+ *
+ * See the documentation at
+ * hdf5_plugins/docs folder In
+ * particular:
+ * INSTALL_With_CMake
+ * USING_HDF5_AND_CMake
*/
/**
@@ -36,26 +286,6 @@
* Use the functions in this module to manage the loading behavior of HDF5
* plugins.
*
- *
- * Create | Read |
- *
- *
- * \snippet H5PL_examples.c create
- * |
- *
- * \snippet H5PL_examples.c read
- * |
- *
Update | Delete |
- *
- *
- * \snippet H5PL_examples.c update
- * |
- *
- * \snippet H5PL_examples.c delete
- * |
- *
- *
- *
* \attention The loading behavior of HDF5 plugins can be controlled via the
* functions described below and certain environment variables, such
* as \c HDF5_PLUGIN_PRELOAD and \c HDF5_PLUGIN_PATH.
diff --git a/src/H5Zmodule.h b/src/H5Zmodule.h
index 8b1a0dedc4e..c7f8300fb7c 100644
--- a/src/H5Zmodule.h
+++ b/src/H5Zmodule.h
@@ -102,7 +102,7 @@
* Custom filters that have been registered with the library will have
* additional unique identifiers.
*
- * See \ref_dld_filters for more information on how an HDF5 application can
+ * See \ref sec_filter_plugins for more information on how an HDF5 application can
* apply a filter that is not registered with the HDF5 library.
*
* \defgroup H5ZPRE Predefined Filters