Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support decompressing fixed huffman blocks #124

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 49 additions & 23 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,39 @@ constexpr auto distance_infos = std::array<LengthInfo, 30>{
{9, 1025}, {9, 1537}, {10, 2049}, {10, 3073}, {11, 4097},
{11, 6145}, {12, 8193}, {12, 12289}, {13, 16385}, {13, 24577}}};

enum class ParseLitOrLenStatus : std::uint8_t
{
EndOfBlock,
Error,
};

auto parse_lit_or_len(
std::uint16_t lit_or_len, huffman::bit_span& src_bits) -> std::
expected<std::variant<std::byte, std::uint16_t>, ParseLitOrLenStatus>
{
if (lit_or_len < detail::lit_or_len_end_of_block) {
return static_cast<std::byte>(lit_or_len);
}
if (lit_or_len == detail::lit_or_len_end_of_block) {
return std::unexpected{ParseLitOrLenStatus::EndOfBlock};
}
if (lit_or_len > detail::lit_or_len_max) {
return std::unexpected{ParseLitOrLenStatus::Error};
}
std::uint16_t len{};
if (lit_or_len == detail::lit_or_len_max) {
len = detail::lit_or_len_max_decoded;
} else {
Comment on lines +127 to +130
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like you can just return instead of construction + assignment of len?

if (lit_or_len == detail::lit_or_len_max) {
    return detail::lit_or_len_max_decoded;
}

const auto len_idx =
static_cast<size_t>(lit_or_len - detail::lit_or_len_end_of_block - 1);
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
const auto& len_info = detail::length_infos[len_idx];
const auto extra_len = src_bits.pop_n(len_info.extra_bits);
len = len_info.base + extra_len;
}
return len;
}

auto decompress_block_huffman(
huffman::bit_span& src_bits,
std::span<std::byte> dst,
Expand All @@ -81,36 +114,29 @@ auto decompress_block_huffman(
const huffman::table<std::uint16_t, fixed_dist_table_size>& dist_table)
-> DecompressStatus
{
std::uint16_t lit_or_len{};
while (true) {
oliverlee marked this conversation as resolved.
Show resolved Hide resolved
const auto lit_or_len_decoded = huffman::decode_one(len_table, src_bits);
if (not lit_or_len_decoded.encoded_size) {
return DecompressStatus::InvalidLitOrLen;
}
lit_or_len = lit_or_len_decoded.symbol;
src_bits.consume(lit_or_len_decoded.encoded_size);
if (lit_or_len < detail::lit_or_len_end_of_block) {
dst[static_cast<std::size_t>(dst_written++)] =
static_cast<std::byte>(lit_or_len);
continue;
}
if (lit_or_len == detail::lit_or_len_end_of_block) {
break;
}
if (lit_or_len > detail::lit_or_len_max) {
const auto maybe_lit_or_len =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks better to me.

I think this could still benefit from breaking out smaller functions - although if you think that's too much work, feel free to ignore.

  const auto done = 
    parse_lit_or_len(lit_or_len_decoded.symbol, src_bits)
      .and_then(decode_lit_or_len_into(dst))
      .or_else(convert_error_status)
      
   if (done) {
     return *done;
   }

   // done is empty, so the the loop continues

where convert_error_status: ParseLitOrLenStatus -> optional<DecompressStatus>

return {in_place, e == EndOfBlock ? Success : InvalidLitOrLen};

and decode_lit_or_len_into uses the poor version of pattern matching available in C++:

auto decode_lit_or_len_into(std::span<std::byte>& dst)
{
  return [&dst](variant<std::byte, std::uint16_t> lit_or_len) {
    return visit(
      overloaded({
        [&dst](byte lit) { return decode_into(lit, dst); },
        [&dst](uint16_t len) { return decode_into(lit, dst); }
      }),
      lit_or_len
    );
  };
}

// https://en.cppreference.com/w/cpp/utility/variant/visit
template <class... Ts>
struct overloaded : Ts... { using Ts::operator()...; };

and

auto decode_into(byte lit, span& dst)
  -> optional<DecompressStatus>
{
  // lines 133-137
}
auto decode_into(uint16_t len, span& dst)
  -> optional<DecompressStatus>
{
 // lines 139-161
}

although that's a bit of a simplification - you'll also need to capture dst_written.

The different cases are broken out into different functions, although there's some logic inversion with variant::visit.

parse_lit_or_len(lit_or_len_decoded.symbol, src_bits);
if (not maybe_lit_or_len) {
if (maybe_lit_or_len.error() == ParseLitOrLenStatus::EndOfBlock) {
return DecompressStatus::Success;
}
return DecompressStatus::InvalidLitOrLen;
}
std::uint16_t len{};
if (lit_or_len == detail::lit_or_len_max) {
len = detail::lit_or_len_max_decoded;
} else {
const auto len_idx =
static_cast<size_t>(lit_or_len - detail::lit_or_len_end_of_block - 1);
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
const auto& len_info = detail::length_infos[len_idx];
const auto extra_len = src_bits.pop_n(len_info.extra_bits);
len = len_info.base + extra_len;
const auto lit_or_len = maybe_lit_or_len.value();
if (std::holds_alternative<std::byte>(lit_or_len)) {
if (dst.size() - static_cast<std::size_t>(dst_written) < 1) {
return DecompressStatus::DstTooSmall;
}
dst[static_cast<size_t>(dst_written++)] = std::get<std::byte>(lit_or_len);
continue;
}
const auto len = std::get<std::uint16_t>(lit_or_len);
const auto dist_decoded = huffman::decode_one(dist_table, src_bits);
const auto dist_code = dist_decoded.symbol;
if (not dist_decoded.encoded_size) {
Expand All @@ -131,15 +157,15 @@ auto decompress_block_huffman(
return DecompressStatus::DstTooSmall;
}
starflate::detail::copy_from_before(
dst.begin() + dst_written, distance, len);
distance, dst.begin() + dst_written, len);
dst_written += len;
}
return DecompressStatus::Success;
}

/// Copy n bytes from distance bytes before dst to dst.
void copy_from_before(
std::span<std::byte>::iterator dst, std::uint16_t distance, std::uint16_t n)
std::uint16_t distance, std::span<std::byte>::iterator dst, std::uint16_t n)
{
std::ptrdiff_t n_signed{n};
const auto src = dst - distance;
Expand Down
2 changes: 1 addition & 1 deletion src/decompress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ constexpr auto lit_or_len_max_decoded = std::uint16_t{258};
///
/// @pre dst - distance is valid.
void copy_from_before(
std::span<std::byte>::iterator dst,
std::uint16_t distance,
std::span<std::byte>::iterator dst,
std::uint16_t n);
} // namespace detail

Expand Down
2 changes: 1 addition & 1 deletion src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ auto main(int, char* argv[]) -> int
test("copy_from_before") = [] {
auto src_and_dst = huffman::byte_array(1, 2, 0, 0, 0, 0);
const auto dst_span = std::span<std::byte>{src_and_dst}.subspan(2);
detail::copy_from_before(dst_span.begin(), 2, 3);
detail::copy_from_before(2, dst_span.begin(), 3);
expect(eq(src_and_dst, huffman::byte_array(1, 2, 1, 2, 1, 0)));
};
};