Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

huffman decode #69

Merged
merged 1 commit into from
Sep 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,22 @@ Otherwise, copy the clangd args from the [.vscode/settings.json](.vscode/setting
### Done

* Build Huffman code tables from given symbol frequencies.
* Huffman decoding with C++ std lib.

### TODO

* Implement Huffman decompression with C++ std lib.
* Set up bazel build of OpenSYCL with OpenMP.
* Port Huffman decompression to SYCL.
* Implement LZ77 with C++ std lib.
* Port LZ77 to SYCL.
* Implement Deflate decompression.
* Try building it for a GPU.
* Get SYCL building with bazel. Already have OpenSYCL building for CPU only [here](https://github.com/garymm/xpu).
Would be nicer to use [intel's LLVM](https://github.com/intel/llvm) which supports lots of GPUs.
* (maybe?) Implement LZ77 with C++ std lib.
* Implement Deflate decompression with C++ std lib.
* Port Deflate to SYCL.
* Benchmark it on CPU.
* Build system work to get it to run on GPU.
* Benchmark it on GPU.

## References

* [DEFLATE Compressed Data Format Specification version 1.3](https://tools.ietf.org/html/rfc1951)
* [pyflate](https://github.com/garymm/pyflate)
* [An Explanation of the Deflate Algorithm](https://zlib.net/feldspar.html)
* [LZ77 Specification](https://www.cs.duke.edu/courses/spring03/cps296.5/papers/ziv_lempel_1977_universal_algorithm.pdf)
1 change: 1 addition & 0 deletions huffman/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ cc_library(
"src/bit.hpp",
"src/bit_span.hpp",
"src/code.hpp",
"src/decode.hpp",
"src/detail/base_view.hpp",
"src/detail/iterator_interface.hpp",
"src/detail/static_vector.hpp",
Expand Down
1 change: 1 addition & 0 deletions huffman/huffman.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
#include "huffman/src/bit.hpp"
#include "huffman/src/bit_span.hpp"
#include "huffman/src/code.hpp"
#include "huffman/src/decode.hpp"
#include "huffman/src/encoding.hpp"
#include "huffman/src/table.hpp"
12 changes: 11 additions & 1 deletion huffman/src/bit_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <cstdint>
#include <iterator>
#include <limits>
#include <ranges>

namespace gpu_deflate::huffman {
/// A non-owning span of bits. Allows for iteration over the individual bits.
Expand Down Expand Up @@ -76,10 +77,19 @@ class bit_span
///
/// @param data a pointer to the first byte of the data.
/// @param bit_size the number of bits in the data.
constexpr bit_span(const std::byte* data, size_t bit_size)
constexpr bit_span(const std::byte* data, std::size_t bit_size)
: data_(data), bit_size_(bit_size)
{}

template <std::ranges::contiguous_range R>
requires std::ranges::borrowed_range<R>
// TODO: remove cppcoreguidelines-pro-type-member-init once
// https://reviews.llvm.org/D157367 in our toolchain.
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,bugprone-forwarding-reference-overload)
constexpr bit_span(R&& r)
: bit_span(std::ranges::data(r), std::ranges::size(r) * CHAR_BIT)
{}

[[nodiscard]]
constexpr auto begin() const -> iterator
{
Expand Down
15 changes: 15 additions & 0 deletions huffman/src/code.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,21 @@ class code
return std::move(c);
}

/// Right pad `c` with `b`
///
friend constexpr auto operator<<(code& c, bit b) -> code&
{
c.value_ <<= 1U;
c.value_ |= static_cast<std::size_t>(bool(b));
++c.bitsize_;
return c;
}
friend constexpr auto operator<<(code&& c, bit b) -> code&&
{
c << b;
return std::move(c);
}

/// Inserts a textual representation of `c` into `os`
///
friend auto operator<<(std::ostream& os, const code& c) -> std::ostream&
Expand Down
48 changes: 48 additions & 0 deletions huffman/src/decode.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#pragma once
#include "huffman/src/bit_span.hpp"
#include "huffman/src/code.hpp"
#include "huffman/src/table.hpp"

#include <iterator>

namespace gpu_deflate::huffman {
/// Decodes a bit stream using a code table.
///
/// If a code from \p bits is not found in \p code_table, the
/// decoding returns immediately without reading remaining \p bits.
///
/// @param code_table The code table to use for decoding.
/// @param bits The bit stream to decode.
/// @param output The output iterator to write the decoded symbols to.
///
/// @returns The output iterator after writing the decoded symbols.
/// @tparam Symbol The type of the symbols in the code table.
/// @tparam Extent The extent of the code table.
/// @tparam O The type of the output iterator.
template <
std::regular Symbol,
std::size_t Extent = std::dynamic_extent,
std::output_iterator<Symbol> O>
constexpr auto
decode(const table<Symbol, Extent>& code_table, bit_span bits, O output) -> O
{
code current_code{};
auto code_table_pos = code_table.begin();
for (auto bit : bits) {
current_code << bit;
auto found = code_table.find(current_code, code_table_pos);
if (found) {
*output = (*found)->symbol;
output++;
code_table_pos = code_table.begin();
current_code = code{};
continue;
}
if (found.error() == code_table.end()) {
break;
}
code_table_pos = found.error();
}
return output;
}
} // namespace gpu_deflate::huffman
10 changes: 10 additions & 0 deletions huffman/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ cc_test(
],
)

cc_test(
name = "decode_test",
timeout = "short",
srcs = ["decode_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_binary(
name = "bench",
srcs = ["bench.cpp"],
Expand Down
58 changes: 58 additions & 0 deletions huffman/test/decode_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "huffman/huffman.hpp"

#include <boost/ut.hpp>

#include <array>
#include <climits>
#include <cstddef>
#include <stdexcept>
#include <utility>
#include <vector>

auto main() -> int
{
using ::boost::ut::expect;
using ::boost::ut::test;

namespace huffman = ::gpu_deflate::huffman;
using namespace huffman::literals;

test("basic") = [] {
// encoded data from dahuffman readme.rst, but in hex.
constexpr std::array<std::byte, 6> encoded_bytes = {
std::byte{0x86},
std::byte{0x7c},
std::byte{0x25},
std::byte{0x13},
std::byte{0x69},
std::byte{0x40}};

constexpr char eot = {'\4'};
static constexpr auto code_table = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{00000_c, eot},
{00001_c, 'x'},
{0001_c, 'q'},
{001_c, 'n'},
{01_c, 'i'},
{1_c, 'e'}}
}; // clang-format on

constexpr std::array expected = {
'e', 'x', 'e', 'n', 'e', 'e', 'e', 'e', 'x', 'n',
'i', 'q', 'n', 'e', 'i', 'e', 'i', 'n', 'i', eot,
};
constexpr auto output_buf = [&] {
std::array<char, expected.size()> output_buf{};
auto result = decode(code_table, encoded_bytes, output_buf.begin());
// result should point to the back of output_buf.
if (output_buf.end() != result) {
throw std::runtime_error("assertion failed");
}
return output_buf;
}();

static_assert(output_buf == expected);
};
}