From 8bb1ed8940880d33f4f9c2ea9b6072abd57219ef Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Tue, 16 Jul 2024 13:37:52 +0200 Subject: [PATCH 1/6] Bump test version to 1.14.4.3 (#1028) * add a ubuntu 24.04 CI --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66069123d..c80e691fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,6 +58,9 @@ jobs: - config: os: ubuntu-22.04 flags: '-DHIGHFIVE_TEST_BOOST=Off -DCMAKE_CXX_STANDARD=20 -DHIGHFIVE_HAS_CONCEPTS=On' + - config: + os: ubuntu-24.04 + flags: '-DHIGHFIVE_TEST_BOOST=Off -DCMAKE_CXX_STANDARD=20 -DHIGHFIVE_HAS_CONCEPTS=On' steps: - uses: actions/checkout@v3 @@ -96,7 +99,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - hdf5_version : [ hdf5-1_8_23, hdf5-1_10_11, hdf5-1_12_3, hdf5_1.14.4.1 ] + hdf5_version : [ hdf5-1_8_23, hdf5-1_10_11, hdf5-1_12_3, hdf5_1.14.4.3 ] steps: - uses: actions/checkout@v3 From 8f00f1c30b3a3bae6fb21a8afc1a8042f024baad Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 25 Jul 2024 16:18:37 +0200 Subject: [PATCH 2/6] Fix indentation. (#1029) --- tests/unit/test_high_five_selection.cpp | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/unit/test_high_five_selection.cpp b/tests/unit/test_high_five_selection.cpp index e3b91e4cc..586af0b17 100644 --- a/tests/unit/test_high_five_selection.cpp +++ b/tests/unit/test_high_five_selection.cpp @@ -415,33 +415,33 @@ std::vector make_irregular_hyperslab_test_data() { // xor, irregular auto slab_ab_xor = HyperSlab(slabs["a"]) ^ slabs["b"]; // clang-format off - auto answer_ab_xor = IrregularHyperSlabAnswer{{ - {1ul, 1ul}, {1ul, 2ul}, - {2ul, 0ul}, {2ul, 2ul}, - {3ul, 1ul}, {3ul, 2ul} - }}; + auto answer_ab_xor = IrregularHyperSlabAnswer{{ + {1ul, 1ul}, {1ul, 2ul}, + {2ul, 0ul}, {2ul, 2ul}, + {3ul, 1ul}, {3ul, 2ul} + }}; // clang-format on test_data.push_back({"a xor b", slab_ab_xor, answer_ab_xor}); // (not a) and e, irregular auto slab_ab_nota = HyperSlab(slabs["a"]).notA(slabs["b"]); // clang-format off - auto answer_ab_nota = IrregularHyperSlabAnswer{{ - {1ul, 1ul}, {1ul, 2ul}, - {2ul, 2ul}, - {3ul, 1ul}, {3ul, 2ul} - }}; + auto answer_ab_nota = IrregularHyperSlabAnswer{{ + {1ul, 1ul}, {1ul, 2ul}, + {2ul, 2ul}, + {3ul, 1ul}, {3ul, 2ul} + }}; // clang-format on test_data.push_back({"a nota b", slab_ab_nota, answer_ab_nota}); // (not a) and e, irregular auto slab_ba_notb = HyperSlab(slabs["b"]).notB(slabs["a"]); // clang-format off - auto answer_ba_notb = IrregularHyperSlabAnswer{{ - {1ul, 1ul}, {1ul, 2ul}, - {2ul, 2ul}, - {3ul, 1ul}, {3ul, 2ul} - }}; + auto answer_ba_notb = IrregularHyperSlabAnswer{{ + {1ul, 1ul}, {1ul, 2ul}, + {2ul, 2ul}, + {3ul, 1ul}, {3ul, 2ul} + }}; // clang-format on test_data.push_back({"b notb a", slab_ba_notb, answer_ba_notb}); From 62bef2ff5799262f69e19c7180d932a36d7fbb99 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Fri, 26 Jul 2024 10:28:07 +0200 Subject: [PATCH 3/6] Add private DataSpace (named) ctor `fromId`. (#1030) Certain HDF5 API creates data spaces, in order to use this API, we need to be able to create `HighFive::DataSpace` from an HID. --- include/highfive/H5DataSpace.hpp | 17 +++++++++++++++++ include/highfive/bits/H5Dataspace_misc.hpp | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/include/highfive/H5DataSpace.hpp b/include/highfive/H5DataSpace.hpp index 463648507..6c98d4c99 100644 --- a/include/highfive/H5DataSpace.hpp +++ b/include/highfive/H5DataSpace.hpp @@ -19,6 +19,14 @@ namespace HighFive { +namespace detail { +/// @brief Create a HighFive::DataSpace from an HID, without incrementing the id. +/// +/// @note This is internal API and subject to change. +/// @internal +DataSpace make_data_space(hid_t hid); +} // namespace detail + /// \brief Class representing the space (dimensions) of a DataSet /// /// \code{.cpp} @@ -254,9 +262,18 @@ class DataSpace: public Object { protected: DataSpace() = default; + static DataSpace fromId(hid_t hid) { + DataSpace space; + space._hid = hid; + + return space; + } + friend class Attribute; friend class File; friend class DataSet; + + friend DataSpace detail::make_data_space(hid_t hid); }; } // namespace HighFive diff --git a/include/highfive/bits/H5Dataspace_misc.hpp b/include/highfive/bits/H5Dataspace_misc.hpp index 4382c14c1..21ec6397e 100644 --- a/include/highfive/bits/H5Dataspace_misc.hpp +++ b/include/highfive/bits/H5Dataspace_misc.hpp @@ -21,6 +21,12 @@ namespace HighFive { +namespace detail { +inline DataSpace make_data_space(hid_t hid) { + return DataSpace::fromId(hid); +} +} // namespace detail + inline DataSpace::DataSpace(const std::vector& dims) : DataSpace(dims.begin(), dims.end()) {} From 8145c27b008a2392c26b32f1ca298affc78e01b4 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Fri, 26 Jul 2024 10:42:57 +0200 Subject: [PATCH 4/6] Remove pointless `static`. (#1035) --- tests/unit/data_generator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/data_generator.hpp b/tests/unit/data_generator.hpp index e4e7dfa85..d8342b634 100644 --- a/tests/unit/data_generator.hpp +++ b/tests/unit/data_generator.hpp @@ -68,7 +68,7 @@ std::vector unravel(size_t flat_index, const Dims& dims) { } template -static size_t flat_size(const Dims& dims) { +size_t flat_size(const Dims& dims) { size_t n = 1; for (auto d: dims) { n *= d; From e9492c10c4e7b8a4a2b57f3f2bf451b2bf843e30 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Fri, 26 Jul 2024 14:02:37 +0200 Subject: [PATCH 5/6] Optimize chained hyperslab selection. (#1031) A common pattern for creating semi-unstructured selection is to use many (small) RegularHyperSlab and chain them: ``` HyperSlab hyperslab; for(auto slab : regular_hyper_slabs) { hyperslab |= slab; } ``` This eventually triggers calling: ``` for(auto slab : regular_hyper_slabs) { auto [offset, stride, counts, blocks] = slab; H5Sselect_hyperslab(space_id, offset, stride, counts, block); } ``` Measurements show that this has runtime that's quadratic in the number of regular hyper slabs. This starts becoming prohibitive at 10k - 40k slabs. We noticed that `H5Scombine_select` does not suffer from the same performance issue. This allows us to optimize (long) chain of `Op::Or` using divide and conquer. The current implementation only optimizes streaks of `Op::Or`. --- include/highfive/bits/H5Slice_traits.hpp | 122 ++++++++++++++++++++--- include/highfive/bits/h5s_wrapper.hpp | 21 ++++ tests/unit/test_high_five_selection.cpp | 57 +++++++++++ 3 files changed, 186 insertions(+), 14 deletions(-) diff --git a/include/highfive/bits/H5Slice_traits.hpp b/include/highfive/bits/H5Slice_traits.hpp index 6812a0914..ed10aa23c 100644 --- a/include/highfive/bits/H5Slice_traits.hpp +++ b/include/highfive/bits/H5Slice_traits.hpp @@ -162,20 +162,7 @@ class HyperSlab { } DataSpace apply(const DataSpace& space_) const { - auto space = space_.clone(); - for (const auto& sel: selects) { - if (sel.op == Op::None) { - detail::h5s_select_none(space.getId()); - } else { - detail::h5s_select_hyperslab(space.getId(), - convert(sel.op), - sel.offset.empty() ? nullptr : sel.offset.data(), - sel.stride.empty() ? nullptr : sel.stride.data(), - sel.count.empty() ? nullptr : sel.count.data(), - sel.block.empty() ? nullptr : sel.block.data()); - } - } - return space; + return apply_impl(space_); } private: @@ -229,6 +216,113 @@ class HyperSlab { }; std::vector selects; + + protected: + DataSpace select_none(const DataSpace& outer_space) const { + auto space = outer_space.clone(); + detail::h5s_select_none(space.getId()); + return space; + } + + void select_hyperslab(DataSpace& space, const Select_& sel) const { + detail::h5s_select_hyperslab(space.getId(), + convert(sel.op), + sel.offset.empty() ? nullptr : sel.offset.data(), + sel.stride.empty() ? nullptr : sel.stride.data(), + sel.count.empty() ? nullptr : sel.count.data(), + sel.block.empty() ? nullptr : sel.block.data()); + } + +#if H5_VERSION_GE(1, 10, 6) + /// The length of a stream of `Op::Or` starting at `begin`. + size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const { + assert(op == Op::Or); + auto it = std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; }); + return static_cast(it - begin); + } + + DataSpace combine_selections(const DataSpace& left_space, + Op op, + const DataSpace& right_space) const { + return detail::make_data_space( + H5Scombine_select(left_space.getId(), convert(op), right_space.getId())); + } + + /// Reduce a sequence of `Op::Or` efficiently. + /// + /// The issue is that `H5Sselect_hyperslab` runs in time that linear of the + /// number of block in the existing selection. Therefore, a loop that adds + /// slab-by-slab has quadratic runtime in the number of slabs. + /// + /// Fortunately, `H5Scombine_select` doesn't suffer from the same problem. + /// However, it's only available in 1.10.6 and newer. + /// + /// The solution is to use divide-and-conquer to reduce (long) streaks of + /// `Op::Or` in what seems to be log-linear time. + DataSpace reduce_streak(const DataSpace& outer_space, + Select_ const* begin, + Select_ const* end, + Op op) const { + assert(op == Op::Or); + + if (begin == end) { + throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'."); + } + + std::ptrdiff_t distance = end - begin; + if (distance == 1) { + auto space = select_none(outer_space); + select_hyperslab(space, *begin); + return space; + } + + Select_ const* mid = begin + distance / 2; + auto right_space = reduce_streak(outer_space, begin, mid, op); + auto left_space = reduce_streak(outer_space, mid, end, op); + + return combine_selections(left_space, op, right_space); + } + + DataSpace apply_impl(const DataSpace& space_) const { + auto space = space_.clone(); + auto n_selects = selects.size(); + for (size_t i = 0; i < n_selects; ++i) { + auto begin = selects.data() + i; + auto end = selects.data() + n_selects; + + auto n_ors = detect_streak(begin, end, Op::Or); + + if (n_ors > 1) { + auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or); + // Since HDF5 doesn't allow `combine_selections` with a None + // selection, we need to avoid the issue: + if (detail::h5s_get_select_type(space.getId()) == H5S_SEL_NONE) { + space = right_space; + } else { + space = combine_selections(space, Op::Or, right_space); + } + i += n_ors - 1; + } else if (selects[i].op == Op::None) { + detail::h5s_select_none(space.getId()); + } else { + select_hyperslab(space, selects[i]); + } + } + return space; + } +#else + DataSpace apply_impl(const DataSpace& space_) const { + auto space = space_.clone(); + for (const auto& sel: selects) { + if (sel.op == Op::None) { + detail::h5s_select_none(space.getId()); + } else { + select_hyperslab(space, sel); + } + } + return space; + } +#endif }; /// diff --git a/include/highfive/bits/h5s_wrapper.hpp b/include/highfive/bits/h5s_wrapper.hpp index 03edf8005..250ffcf6d 100644 --- a/include/highfive/bits/h5s_wrapper.hpp +++ b/include/highfive/bits/h5s_wrapper.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include namespace HighFive { namespace detail { @@ -110,6 +111,26 @@ inline H5S_class_t h5s_get_simple_extent_type(hid_t space_id) { return cls; } +inline H5S_sel_type h5s_get_select_type(hid_t space_id) { + H5S_sel_type type = H5Sget_select_type(space_id); + if (type < 0) { + HDF5ErrMapper::ToException("Unable to get type of selection."); + } + + return type; +} + +#if H5_VERSION_GE(1, 10, 6) +inline hid_t h5s_combine_select(hid_t space1_id, H5S_seloper_t op, hid_t space2_id) { + auto space_id = H5Scombine_select(space1_id, op, space2_id); + if (space_id == H5I_INVALID_HID) { + HDF5ErrMapper::ToException("Unable to combine two selections."); + } + + return space_id; +} +#endif + } // namespace detail } // namespace HighFive diff --git a/tests/unit/test_high_five_selection.cpp b/tests/unit/test_high_five_selection.cpp index 586af0b17..1d6006f4c 100644 --- a/tests/unit/test_high_five_selection.cpp +++ b/tests/unit/test_high_five_selection.cpp @@ -25,6 +25,7 @@ #include #include "tests_high_five.hpp" +#include "data_generator.hpp" using namespace HighFive; using Catch::Matchers::Equals; @@ -534,3 +535,59 @@ void irregularHyperSlabSelectionWriteTest() { TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::tuple) { irregularHyperSlabSelectionWriteTest(); } + +TEST_CASE("select_multiple_ors", "[hyperslab]") { + size_t n = 100, m = 20; + size_t nsel = 30; + auto x = testing::DataGenerator>>::create({n, m}); + + auto file = File("select_multiple_ors.h5", File::Truncate); + auto dset = file.createDataSet("x", x); + + std::vector> indices; + auto hyperslab = HyperSlab(); + for (size_t i = 0; i < nsel; ++i) { + std::vector offsets{i, i % 10}; + std::vector counts{1, 3}; + hyperslab |= RegularHyperSlab(offsets, counts); + + for (size_t k = 0; k < counts[1]; ++k) { + indices.push_back({offsets[0], offsets[1] + k}); + } + } + + SECTION("Pure Or Chain") { + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = indices[k][0]; + size_t j = indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } + + SECTION("Or Chain And Slab") { + std::vector offsets{5, 2}; + std::vector counts{85, 12}; + + std::vector> selected_indices; + for (const auto ij: indices) { + std::array ij_max = {offsets[0] + counts[0], offsets[1] + counts[1]}; + + if (offsets[0] <= ij[0] && ij[0] < ij_max[0] && offsets[1] <= ij[1] && + ij[1] < ij_max[1]) { + selected_indices.push_back(ij); + } + } + + hyperslab &= RegularHyperSlab(offsets, counts); + + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == selected_indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = selected_indices[k][0]; + size_t j = selected_indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } +} From a6e17bdf4ae47801ffa44e768b86d42ce09a8e97 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Tue, 20 Aug 2024 09:40:32 +0200 Subject: [PATCH 6/6] Fix corner cases of combine_selection. (#1038) --- include/highfive/bits/H5Slice_traits.hpp | 29 +++++-- tests/unit/test_high_five_selection.cpp | 102 ++++++++++++++++++++--- 2 files changed, 111 insertions(+), 20 deletions(-) diff --git a/include/highfive/bits/H5Slice_traits.hpp b/include/highfive/bits/H5Slice_traits.hpp index ed10aa23c..bf77b50e9 100644 --- a/include/highfive/bits/H5Slice_traits.hpp +++ b/include/highfive/bits/H5Slice_traits.hpp @@ -244,8 +244,25 @@ class HyperSlab { DataSpace combine_selections(const DataSpace& left_space, Op op, const DataSpace& right_space) const { - return detail::make_data_space( - H5Scombine_select(left_space.getId(), convert(op), right_space.getId())); + assert(op == Op::Or); + + auto left_type = detail::h5s_get_select_type(left_space.getId()); + auto right_type = detail::h5s_get_select_type(right_space.getId()); + + // Since HDF5 doesn't allow `combine_selections` with a None + // selection, we need to avoid the issue: + if (left_type == H5S_SEL_NONE) { + return right_space; + } else if (right_type == H5S_SEL_NONE) { + return left_space; + } else if (left_type == H5S_SEL_ALL) { + return left_space; + } else if (right_type == H5S_SEL_ALL) { + return right_space; + } else { + return detail::make_data_space( + detail::h5s_combine_select(left_space.getId(), convert(op), right_space.getId())); + } } /// Reduce a sequence of `Op::Or` efficiently. @@ -294,13 +311,7 @@ class HyperSlab { if (n_ors > 1) { auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or); - // Since HDF5 doesn't allow `combine_selections` with a None - // selection, we need to avoid the issue: - if (detail::h5s_get_select_type(space.getId()) == H5S_SEL_NONE) { - space = right_space; - } else { - space = combine_selections(space, Op::Or, right_space); - } + space = combine_selections(space, Op::Or, right_space); i += n_ors - 1; } else if (selects[i].op == Op::None) { detail::h5s_select_none(space.getId()); diff --git a/tests/unit/test_high_five_selection.cpp b/tests/unit/test_high_five_selection.cpp index 1d6006f4c..eb27bd6a9 100644 --- a/tests/unit/test_high_five_selection.cpp +++ b/tests/unit/test_high_five_selection.cpp @@ -536,6 +536,17 @@ TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::t irregularHyperSlabSelectionWriteTest(); } +void check_selected(const std::vector& selected, + const std::vector>& indices, + const std::vector>& x) { + REQUIRE(selected.size() == indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = indices[k][0]; + size_t j = indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } +} + TEST_CASE("select_multiple_ors", "[hyperslab]") { size_t n = 100, m = 20; size_t nsel = 30; @@ -558,12 +569,7 @@ TEST_CASE("select_multiple_ors", "[hyperslab]") { SECTION("Pure Or Chain") { auto selected = dset.select(hyperslab).read>(); - REQUIRE(selected.size() == indices.size()); - for (size_t k = 0; k < selected.size(); ++k) { - size_t i = indices[k][0]; - size_t j = indices[k][1]; - REQUIRE(selected[k] == x[i][j]); - } + check_selected(selected, indices, x); } SECTION("Or Chain And Slab") { @@ -583,11 +589,85 @@ TEST_CASE("select_multiple_ors", "[hyperslab]") { hyperslab &= RegularHyperSlab(offsets, counts); auto selected = dset.select(hyperslab).read>(); - REQUIRE(selected.size() == selected_indices.size()); - for (size_t k = 0; k < selected.size(); ++k) { - size_t i = selected_indices[k][0]; - size_t j = selected_indices[k][1]; - REQUIRE(selected[k] == x[i][j]); + check_selected(selected, selected_indices, x); + } +} + +TEST_CASE("select_multiple_ors_edge_cases", "[hyperslab]") { + size_t n = 100, m = 20; + + auto x = testing::DataGenerator>>::create({n, m}); + auto file = File("select_multiple_ors_edge_cases.h5", File::Truncate); + auto dset = file.createDataSet("x", x); + + std::vector> indices; + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < m; ++j) { + indices.push_back({i, j}); } } + + auto space = DataSpace({n, m}); + SECTION("complete |= ") { + auto hyperslab = HyperSlab(RegularHyperSlab({0, 0}, {n, m})); + + // Select everything; and then redundantly some parts again. + hyperslab &= RegularHyperSlab({0, 0}, {n, m}); + hyperslab |= RegularHyperSlab({0, 0}, {n, m / 2}); + hyperslab |= RegularHyperSlab({3, 0}, {1, 3}); + hyperslab |= RegularHyperSlab({6, 0}, {1, 3}); + hyperslab.apply(space); + + auto selected = dset.select(hyperslab).read>(); + check_selected(selected, indices, x); + } + + SECTION("complete &=") { + auto hyperslab = HyperSlab(); + + // With detours, select everything, then reduce it to the first 2 + // elements. + hyperslab |= RegularHyperSlab({0, 0}, {n, m / 2}); + hyperslab |= RegularHyperSlab({0, 0}, {n, m / 2}); + hyperslab |= RegularHyperSlab({0, m / 2}, {n, m - m / 2}); + hyperslab |= RegularHyperSlab({0, 0}, {n, m}); + hyperslab &= RegularHyperSlab({0, 0}, {1, 2}); + hyperslab.apply(space); + + indices = {{0, 0}, {0, 1}}; + + auto selected = dset.select(hyperslab).read>(); + check_selected(selected, indices, x); + } + + SECTION("empty |=") { + auto hyperslab = HyperSlab(RegularHyperSlab({0, 0}, {n, m})); + hyperslab &= RegularHyperSlab({0, 0}, {1, 2}); + hyperslab &= RegularHyperSlab({3, 0}, {1, 2}); + + hyperslab |= RegularHyperSlab({0, 0}, {n, m / 2}); + hyperslab |= RegularHyperSlab({0, 0}, {n, m / 2}); + hyperslab |= RegularHyperSlab({0, m / 2}, {n, m - m / 2}); + hyperslab |= RegularHyperSlab({0, 0}, {n, m}); + + hyperslab.apply(space); + + auto selected = dset.select(hyperslab).read>(); + check_selected(selected, indices, x); + } + + SECTION("|= empty") { + auto hyperslab = HyperSlab(); + + hyperslab |= RegularHyperSlab({0, 0}, {1, 2}); + hyperslab |= RegularHyperSlab({0, 0}, {1, 2}); + hyperslab |= RegularHyperSlab({0, 0}, {0, 0}); + + hyperslab.apply(space); + + indices = {{0, 0}, {0, 1}}; + + auto selected = dset.select(hyperslab).read>(); + check_selected(selected, indices, x); + } }