Skip to content

Commit

Permalink
huffman decode (#69)
Browse files Browse the repository at this point in the history
Change-Id: I97309a7fd3c7059fe439fe516ba97920b53b5fcd
  • Loading branch information
garymm committed Sep 17, 2023
1 parent 22cc463 commit 90fc166
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 8 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,22 @@ Otherwise, copy the clangd args from the [.vscode/settings.json](.vscode/setting
### Done

* Build Huffman code tables from given symbol frequencies.
* Huffman decoding with C++ std lib.

### TODO

* Implement Huffman decompression with C++ std lib.
* Set up bazel build of OpenSYCL with OpenMP.
* Port Huffman decompression to SYCL.
* Implement LZ77 with C++ std lib.
* Port LZ77 to SYCL.
* Implement Deflate decompression.
* Try building it for a GPU.
* Get SYCL building with bazel. Already have OpenSYCL building for CPU only [here](https://github.com/garymm/xpu).
Would be nicer to use [intel's LLVM](https://github.com/intel/llvm) which supports lots of GPUs.
* (maybe?) Implement LZ77 with C++ std lib.
* Implement Deflate decompression with C++ std lib.
* Port Deflate to SYCL.
* Benchmark it on CPU.
* Build system work to get it to run on GPU.
* Benchmark it on GPU.

## References

* [DEFLATE Compressed Data Format Specification version 1.3](https://tools.ietf.org/html/rfc1951)
* [pyflate](https://github.com/garymm/pyflate)
* [An Explanation of the Deflate Algorithm](https://zlib.net/feldspar.html)
* [LZ77 Specification](https://www.cs.duke.edu/courses/spring03/cps296.5/papers/ziv_lempel_1977_universal_algorithm.pdf)
1 change: 1 addition & 0 deletions huffman/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ cc_library(
"src/bit.hpp",
"src/bit_span.hpp",
"src/code.hpp",
"src/decode.hpp",
"src/detail/base_view.hpp",
"src/detail/iterator_interface.hpp",
"src/detail/static_vector.hpp",
Expand Down
1 change: 1 addition & 0 deletions huffman/huffman.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
#include "huffman/src/bit.hpp"
#include "huffman/src/bit_span.hpp"
#include "huffman/src/code.hpp"
#include "huffman/src/decode.hpp"
#include "huffman/src/encoding.hpp"
#include "huffman/src/table.hpp"
12 changes: 11 additions & 1 deletion huffman/src/bit_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <cstdint>
#include <iterator>
#include <limits>
#include <ranges>

namespace gpu_deflate::huffman {
/// A non-owning span of bits. Allows for iteration over the individual bits.
Expand Down Expand Up @@ -76,10 +77,19 @@ class bit_span
///
/// @param data a pointer to the first byte of the data.
/// @param bit_size the number of bits in the data.
constexpr bit_span(const std::byte* data, size_t bit_size)
constexpr bit_span(const std::byte* data, std::size_t bit_size)
: data_(data), bit_size_(bit_size)
{}

template <std::ranges::contiguous_range R>
requires std::ranges::borrowed_range<R>
// TODO: remove cppcoreguidelines-pro-type-member-init once
// https://reviews.llvm.org/D157367 in our toolchain.
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,bugprone-forwarding-reference-overload)
constexpr bit_span(R&& r)
: bit_span(std::ranges::data(r), std::ranges::size(r) * CHAR_BIT)
{}

[[nodiscard]]
constexpr auto begin() const -> iterator
{
Expand Down
15 changes: 15 additions & 0 deletions huffman/src/code.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,21 @@ class code
return std::move(c);
}

/// Right pad `c` with `b`
///
friend constexpr auto operator<<(code& c, bit b) -> code&
{
c.value_ <<= 1U;
c.value_ |= static_cast<std::size_t>(bool(b));
++c.bitsize_;
return c;
}
friend constexpr auto operator<<(code&& c, bit b) -> code&&
{
c << b;
return std::move(c);
}

/// Inserts a textual representation of `c` into `os`
///
friend auto operator<<(std::ostream& os, const code& c) -> std::ostream&
Expand Down
48 changes: 48 additions & 0 deletions huffman/src/decode.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#pragma once
#include "huffman/src/bit_span.hpp"
#include "huffman/src/code.hpp"
#include "huffman/src/table.hpp"

#include <iterator>

namespace gpu_deflate::huffman {
/// Decodes a bit stream using a code table.
///
/// If a code from \p bits is not found in \p code_table, the
/// decoding returns immediately without reading remaining \p bits.
///
/// @param code_table The code table to use for decoding.
/// @param bits The bit stream to decode.
/// @param output The output iterator to write the decoded symbols to.
///
/// @returns The output iterator after writing the decoded symbols.
/// @tparam Symbol The type of the symbols in the code table.
/// @tparam Extent The extent of the code table.
/// @tparam O The type of the output iterator.
template <
std::regular Symbol,
std::size_t Extent = std::dynamic_extent,
std::output_iterator<Symbol> O>
constexpr auto
decode(const table<Symbol, Extent>& code_table, bit_span bits, O output) -> O
{
code current_code{};
auto code_table_pos = code_table.begin();
for (auto bit : bits) {
current_code << bit;
auto found = code_table.find(current_code, code_table_pos);
if (found) {
*output = (*found)->symbol;
output++;
code_table_pos = code_table.begin();
current_code = code{};
continue;
}
if (found.error() == code_table.end()) {
break;
}
code_table_pos = found.error();
}
return output;
}
} // namespace gpu_deflate::huffman
10 changes: 10 additions & 0 deletions huffman/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ cc_test(
],
)

cc_test(
name = "decode_test",
timeout = "short",
srcs = ["decode_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_binary(
name = "bench",
srcs = ["bench.cpp"],
Expand Down
58 changes: 58 additions & 0 deletions huffman/test/decode_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "huffman/huffman.hpp"

#include <boost/ut.hpp>

#include <array>
#include <climits>
#include <cstddef>
#include <stdexcept>
#include <utility>
#include <vector>

auto main() -> int
{
using ::boost::ut::expect;
using ::boost::ut::test;

namespace huffman = ::gpu_deflate::huffman;
using namespace huffman::literals;

test("basic") = [] {
// encoded data from dahuffman readme.rst, but in hex.
constexpr std::array<std::byte, 6> encoded_bytes = {
std::byte{0x86},
std::byte{0x7c},
std::byte{0x25},
std::byte{0x13},
std::byte{0x69},
std::byte{0x40}};

constexpr char eot = {'\4'};
static constexpr auto code_table = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{00000_c, eot},
{00001_c, 'x'},
{0001_c, 'q'},
{001_c, 'n'},
{01_c, 'i'},
{1_c, 'e'}}
}; // clang-format on

constexpr std::array expected = {
'e', 'x', 'e', 'n', 'e', 'e', 'e', 'e', 'x', 'n',
'i', 'q', 'n', 'e', 'i', 'e', 'i', 'n', 'i', eot,
};
constexpr auto output_buf = [&] {
std::array<char, expected.size()> output_buf{};
auto result = decode(code_table, encoded_bytes, output_buf.begin());
// result should point to the back of output_buf.
if (output_buf.end() != result) {
throw std::runtime_error("assertion failed");
}
return output_buf;
}();

static_assert(output_buf == expected);
};
}

0 comments on commit 90fc166

Please sign in to comment.