garymm · garymm · Sep 17, 2023 · Jun 29, 2023
diff --git a/README.md b/README.md
@@ -36,19 +36,22 @@ Otherwise, copy the clangd args from the [.vscode/settings.json](.vscode/setting
 ### Done
 
 * Build Huffman code tables from given symbol frequencies.
+* Huffman decoding with C++ std lib.
 
 ### TODO
 
-* Implement Huffman decompression with C++ std lib.
-* Set up bazel build of OpenSYCL with OpenMP.
-* Port Huffman decompression to SYCL.
-* Implement LZ77 with C++ std lib.
-* Port LZ77 to SYCL.
-* Implement Deflate decompression.
-* Try building it for a GPU.
+* Get SYCL building with bazel. Already have OpenSYCL building for CPU only [here](https://github.com/garymm/xpu).
+  Would be nicer to use [intel's LLVM](https://github.com/intel/llvm) which supports lots of GPUs.
+* (maybe?) Implement LZ77 with C++ std lib.
+* Implement Deflate decompression with C++ std lib.
+* Port Deflate to SYCL.
+* Benchmark it on CPU.
+* Build system work to get it to run on GPU.
+* Benchmark it on GPU.
 
 ## References
 
 * [DEFLATE Compressed Data Format Specification version 1.3](https://tools.ietf.org/html/rfc1951)
+* [pyflate](https://github.com/garymm/pyflate)
 * [An Explanation of the Deflate Algorithm](https://zlib.net/feldspar.html)
 * [LZ77 Specification](https://www.cs.duke.edu/courses/spring03/cps296.5/papers/ziv_lempel_1977_universal_algorithm.pdf)
diff --git a/huffman/BUILD.bazel b/huffman/BUILD.bazel
@@ -6,6 +6,7 @@ cc_library(
         "src/bit.hpp",
         "src/bit_span.hpp",
         "src/code.hpp",
+        "src/decode.hpp",
         "src/detail/base_view.hpp",
         "src/detail/iterator_interface.hpp",
         "src/detail/static_vector.hpp",

diff --git a/huffman/huffman.hpp b/huffman/huffman.hpp
@@ -3,5 +3,6 @@
 #include "huffman/src/bit.hpp"
 #include "huffman/src/bit_span.hpp"
 #include "huffman/src/code.hpp"
+#include "huffman/src/decode.hpp"
 #include "huffman/src/encoding.hpp"
 #include "huffman/src/table.hpp"
diff --git a/huffman/src/bit_span.hpp b/huffman/src/bit_span.hpp
@@ -9,6 +9,7 @@
 #include <cstdint>
 #include <iterator>
 #include <limits>
+#include <ranges>
 
 namespace gpu_deflate::huffman {
 /// A non-owning span of bits. Allows for iteration over the individual bits.
@@ -76,10 +77,19 @@ class bit_span
   ///
   /// @param data a pointer to the first byte of the data.
   /// @param bit_size the number of bits in the data.
-  constexpr bit_span(const std::byte* data, size_t bit_size)
+  constexpr bit_span(const std::byte* data, std::size_t bit_size)
       : data_(data), bit_size_(bit_size)
   {}
 
+  template <std::ranges::contiguous_range R>
+    requires std::ranges::borrowed_range<R>
+  // TODO: remove cppcoreguidelines-pro-type-member-init once
+  // https://reviews.llvm.org/D157367 in our toolchain.
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,bugprone-forwarding-reference-overload)
+  constexpr bit_span(R&& r)
+      : bit_span(std::ranges::data(r), std::ranges::size(r) * CHAR_BIT)
+  {}
+
   [[nodiscard]]
   constexpr auto begin() const -> iterator
   {

diff --git a/huffman/src/code.hpp b/huffman/src/code.hpp
@@ -86,6 +86,21 @@ class code
     return std::move(c);
   }
 
+  /// Right pad `c` with `b`
+  ///
+  friend constexpr auto operator<<(code& c, bit b) -> code&
+  {
+    c.value_ <<= 1U;
+    c.value_ |= static_cast<std::size_t>(bool(b));
+    ++c.bitsize_;
+    return c;
+  }
+  friend constexpr auto operator<<(code&& c, bit b) -> code&&
+  {
+    c << b;
+    return std::move(c);
+  }
+
   /// Inserts a textual representation of `c` into `os`
   ///
   friend auto operator<<(std::ostream& os, const code& c) -> std::ostream&

diff --git a/huffman/src/decode.hpp b/huffman/src/decode.hpp
@@ -0,0 +1,48 @@
+#pragma once
+#include "huffman/src/bit_span.hpp"
+#include "huffman/src/code.hpp"
+#include "huffman/src/table.hpp"
+
+#include <iterator>
+
+namespace gpu_deflate::huffman {
+/// Decodes a bit stream using a code table.
+///
+/// If a code from \p bits is not found in \p code_table, the
+/// decoding returns immediately without reading remaining \p bits.
+///
+/// @param code_table The code table to use for decoding.
+/// @param bits The bit stream to decode.
+/// @param output The output iterator to write the decoded symbols to.
+///
+/// @returns The output iterator after writing the decoded symbols.
+/// @tparam Symbol The type of the symbols in the code table.
+/// @tparam Extent The extent of the code table.
+/// @tparam O The type of the output iterator.
+template <
+    std::regular Symbol,
+    std::size_t Extent = std::dynamic_extent,
+    std::output_iterator<Symbol> O>
+constexpr auto
+decode(const table<Symbol, Extent>& code_table, bit_span bits, O output) -> O
+{
+  code current_code{};
+  auto code_table_pos = code_table.begin();
+  for (auto bit : bits) {
+    current_code << bit;
+    auto found = code_table.find(current_code, code_table_pos);
+    if (found) {
+      *output = (*found)->symbol;
+      output++;
+      code_table_pos = code_table.begin();
+      current_code = code{};
+      continue;
+    }
+    if (found.error() == code_table.end()) {
+      break;
+    }
+    code_table_pos = found.error();
+  }
+  return output;
+}
+}  // namespace gpu_deflate::huffman
diff --git a/huffman/test/BUILD.bazel b/huffman/test/BUILD.bazel
@@ -70,6 +70,16 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "decode_test",
+    timeout = "short",
+    srcs = ["decode_test.cpp"],
+    deps = [
+        "//huffman",
+        "@boost_ut",
+    ],
+)
+
 cc_binary(
     name = "bench",
     srcs = ["bench.cpp"],

diff --git a/huffman/test/decode_test.cpp b/huffman/test/decode_test.cpp
@@ -0,0 +1,58 @@
+#include "huffman/huffman.hpp"
+
+#include <boost/ut.hpp>
+
+#include <array>
+#include <climits>
+#include <cstddef>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+auto main() -> int
+{
+  using ::boost::ut::expect;
+  using ::boost::ut::test;
+
+  namespace huffman = ::gpu_deflate::huffman;
+  using namespace huffman::literals;
+
+  test("basic") = [] {
+    // encoded data from dahuffman readme.rst, but in hex.
+    constexpr std::array<std::byte, 6> encoded_bytes = {
+        std::byte{0x86},
+        std::byte{0x7c},
+        std::byte{0x25},
+        std::byte{0x13},
+        std::byte{0x69},
+        std::byte{0x40}};
+
+    constexpr char eot = {'\4'};
+    static constexpr auto code_table =  // clang-format off
+      huffman::table{
+        huffman::table_contents,
+        {std::pair{00000_c, eot},
+                  {00001_c, 'x'},
+                  {0001_c,  'q'},
+                  {001_c,   'n'},
+                  {01_c,    'i'},
+                  {1_c,     'e'}}
+      };  // clang-format on
+
+    constexpr std::array expected = {
+        'e', 'x', 'e', 'n', 'e', 'e', 'e', 'e', 'x', 'n',
+        'i', 'q', 'n', 'e', 'i', 'e', 'i', 'n', 'i', eot,
+    };
+    constexpr auto output_buf = [&] {
+      std::array<char, expected.size()> output_buf{};
+      auto result = decode(code_table, encoded_bytes, output_buf.begin());
+      // result should point to the back of output_buf.
+      if (output_buf.end() != result) {
+        throw std::runtime_error("assertion failed");
+      }
+      return output_buf;
+    }();
+
+    static_assert(output_buf == expected);
+  };
+}