diff --git a/src/CircularBuffer.zig b/src/CircularBuffer.zig index 1e2f99d..f25a231 100644 --- a/src/CircularBuffer.zig +++ b/src/CircularBuffer.zig @@ -1,18 +1,18 @@ -/// 64K buffer of uncompressed data created in inflate (decompression). Has enough -/// history to support writing match; copying length of bytes -/// from the position distance backward from current. -/// -/// Reads can return less than available bytes if they are spread across -/// different circles. So reads should repeat until get required number of bytes -/// or until returned slice is zero length. -/// -/// Note on deflate limits: -/// * non-compressible block is limited to 65,535 bytes. -/// * backward pointer is limited in distance to 32K bytes and in length to 258 bytes. -/// -/// Whole non-compressed block can be written without overlap. We always have -/// history of up to 64K, more then 32K needed. -/// +//! 64K buffer of uncompressed data created in inflate (decompression). Has enough +//! history to support writing match; copying length of bytes +//! from the position distance backward from current. +//! +//! Reads can return less than available bytes if they are spread across +//! different circles. So reads should repeat until get required number of bytes +//! or until returned slice is zero length. +//! +//! Note on deflate limits: +//! * non-compressible block is limited to 65,535 bytes. +//! * backward pointer is limited in distance to 32K bytes and in length to 258 bytes. +//! +//! Whole non-compressed block can be written without overlap. We always have +//! history of up to 64K, more then 32K needed. +//! const std = @import("std"); const assert = std.debug.assert; const testing = std.testing; @@ -32,15 +32,15 @@ fn writeAll(self: *Self, buf: []const u8) void { for (buf) |c| self.write(c); } -// Write literal. +/// Write literal. pub fn write(self: *Self, b: u8) void { assert(self.wp - self.rp < mask); self.buffer[self.wp & mask] = b; self.wp += 1; } -// Write match (back-reference to the same data slice) starting at `distance` -// back from current write position, and `length` of bytes. +/// Write match (back-reference to the same data slice) starting at `distance` +/// back from current write position, and `length` of bytes. pub fn writeMatch(self: *Self, length: u16, distance: u16) !void { if (self.wp < distance or length < consts.base_length or length > consts.max_length or @@ -74,8 +74,8 @@ pub fn writeMatch(self: *Self, length: u16, distance: u16) !void { } } -// Returns writable part of the internal buffer of size `n` at most. Advances -// write pointer, assumes that returned buffer will be filled with data. +/// Returns writable part of the internal buffer of size `n` at most. Advances +/// write pointer, assumes that returned buffer will be filled with data. pub fn getWritable(self: *Self, n: usize) []u8 { const wp = self.wp & mask; const len = @min(n, buffer_len - wp); @@ -83,14 +83,14 @@ pub fn getWritable(self: *Self, n: usize) []u8 { return self.buffer[wp .. wp + len]; } -// Read available data. Can return part of the available data if it is -// spread across two circles. So read until this returns zero length. +/// Read available data. Can return part of the available data if it is +/// spread across two circles. So read until this returns zero length. pub fn read(self: *Self) []const u8 { return self.readAtMost(buffer_len); } -// Read part of available data. Can return less than max even if there are -// more than max decoded data. +/// Read part of available data. Can return less than max even if there are +/// more than max decoded data. pub fn readAtMost(self: *Self, limit: usize) []const u8 { const rb = self.readBlock(if (limit == 0) buffer_len else limit); defer self.rp += rb.len; @@ -103,7 +103,7 @@ const ReadBlock = struct { len: usize, }; -// Returns position of continous read block data. +/// Returns position of continous read block data. fn readBlock(self: *Self, max: usize) ReadBlock { const r = self.rp & mask; const w = self.wp & mask; @@ -118,13 +118,13 @@ fn readBlock(self: *Self, max: usize) ReadBlock { }; } -// Number of free bytes for write. +/// Number of free bytes for write. pub fn free(self: *Self) usize { return buffer_len - (self.wp - self.rp); } -// Full if largest match can't fit. 258 is largest match length. That much bytes -// can be produced in single decode step. +/// Full if largest match can't fit. 258 is largest match length. That much +/// bytes can be produced in single decode step. pub fn full(self: *Self) bool { return self.free() < 258 + 1; } diff --git a/src/SlidingWindow.zig b/src/SlidingWindow.zig index ceab98c..2e935fa 100644 --- a/src/SlidingWindow.zig +++ b/src/SlidingWindow.zig @@ -1,6 +1,6 @@ -/// Used in deflate (compression), holds uncompressed data form which Tokens are -/// produces. In combination with Lookup it is used to find matches in history data. -/// +//! Used in deflate (compression), holds uncompressed data form which Tokens are +//! produces. In combination with Lookup it is used to find matches in history data. +//! const std = @import("std"); const consts = @import("consts.zig"); @@ -20,7 +20,7 @@ wp: usize = 0, // write position rp: usize = 0, // read position fp: isize = 0, // last flush position, tokens are build from fp..rp -// Returns number of bytes written, or 0 if buffer is full and need to slide. +/// Returns number of bytes written, or 0 if buffer is full and need to slide. pub fn write(self: *Self, buf: []const u8) usize { if (self.rp >= max_rp) return 0; // need to slide @@ -30,9 +30,9 @@ pub fn write(self: *Self, buf: []const u8) usize { return n; } -// Slide buffer for hist_len. -// Drops old history, preserves between hist_len and hist_len - min_lookahead. -// Returns number of bytes removed. +/// Slide buffer for hist_len. +/// Drops old history, preserves between hist_len and hist_len - min_lookahead. +/// Returns number of bytes removed. pub fn slide(self: *Self) u16 { assert(self.rp >= max_rp and self.wp >= self.rp); const n = self.wp - hist_len; @@ -43,41 +43,41 @@ pub fn slide(self: *Self) u16 { return @intCast(n); } -// Data from the current position (read position). Those part of the buffer is -// not converted to tokens yet. +/// Data from the current position (read position). Those part of the buffer is +/// not converted to tokens yet. fn lookahead(self: *Self) []const u8 { assert(self.wp >= self.rp); return self.buffer[self.rp..self.wp]; } -// Returns part of the lookahead buffer. If should_flush is set no lookahead is -// preserved otherwise preserves enough data for the longest match. Returns -// null if there is not enough data. +/// Returns part of the lookahead buffer. If should_flush is set no lookahead is +/// preserved otherwise preserves enough data for the longest match. Returns +/// null if there is not enough data. pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 { const min: usize = if (should_flush) 0 else min_lookahead; const lh = self.lookahead(); return if (lh.len > min) lh else null; } -// Advances read position, shrinks lookahead. +/// Advances read position, shrinks lookahead. pub fn advance(self: *Self, n: u16) void { assert(self.wp >= self.rp + n); self.rp += n; } -// Returns writable part of the buffer, where new uncompressed data can be -// written. +/// Returns writable part of the buffer, where new uncompressed data can be +/// written. pub fn writable(self: *Self) []u8 { return self.buffer[self.wp..]; } -// Notification of what part of writable buffer is filled with data. +/// Notification of what part of writable buffer is filled with data. pub fn written(self: *Self, n: usize) void { self.wp += n; } -// Finds match length between previous and current position. -// Used in hot path! +/// Finds match length between previous and current position. +/// Used in hot path! pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 { const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length); // lookahead buffers from previous and current positions @@ -103,19 +103,19 @@ pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 { return if (i >= consts.match.min_length) @intCast(i) else 0; } -// Current position of non-compressed data. Data before rp are already converted -// to tokens. +/// Current position of non-compressed data. Data before rp are already converted +/// to tokens. pub fn pos(self: *Self) u16 { return @intCast(self.rp); } -// Notification that token list is cleared. +/// Notification that token list is cleared. pub fn flush(self: *Self) void { self.fp = @intCast(self.rp); } -// Part of the buffer since last flush or null if there was slide in between (so -// fp becomes negative). +/// Part of the buffer since last flush or null if there was slide in between (so +/// fp becomes negative). pub fn tokensBuffer(self: *Self) ?[]const u8 { assert(self.fp <= self.rp); if (self.fp < 0) return null; diff --git a/src/Token.zig b/src/Token.zig index fd9dd1d..7e067da 100644 --- a/src/Token.zig +++ b/src/Token.zig @@ -1,7 +1,7 @@ -/// Token cat be literal: single byte of data or match; reference to the slice of -/// data in the same stream represented with . Where length -/// can be 3 - 258 bytes, and distance 1 - 32768 bytes. -/// +//! Token cat be literal: single byte of data or match; reference to the slice of +//! data in the same stream represented with . Where length +//! can be 3 - 258 bytes, and distance 1 - 32768 bytes. +//! const std = @import("std"); const assert = std.debug.assert; const print = std.debug.print; diff --git a/src/bit_reader.zig b/src/bit_reader.zig index a31813f..40e9b76 100644 --- a/src/bit_reader.zig +++ b/src/bit_reader.zig @@ -34,15 +34,15 @@ pub fn BitReader(comptime ReaderType: type) type { return self; } - // Try to have `nice` bits are available in buffer. Reads from - // forward reader if there is no `nice` bits in buffer. Returns error - // if end of forward stream is reached and internal buffer is empty. - // It will not error if less than `nice` bits are in buffer, only when - // all bits are exhausted. During inflate we usually know what is the - // maximum bits for the next step but usually that step will need less - // bits to decode. So `nice` is not hard limit, it will just try to have - // that number of bits available. If end of forward stream is reached - // it may be some extra zero bits in buffer. + /// Try to have `nice` bits are available in buffer. Reads from + /// forward reader if there is no `nice` bits in buffer. Returns error + /// if end of forward stream is reached and internal buffer is empty. + /// It will not error if less than `nice` bits are in buffer, only when + /// all bits are exhausted. During inflate we usually know what is the + /// maximum bits for the next step but usually that step will need less + /// bits to decode. So `nice` is not hard limit, it will just try to have + /// that number of bits available. If end of forward stream is reached + /// it may be some extra zero bits in buffer. pub inline fn fill(self: *Self, nice: u6) !void { if (self.nbits >= nice) { return; // We have enought bits @@ -67,7 +67,7 @@ pub fn BitReader(comptime ReaderType: type) type { return error.EndOfStream; } - // Read exactly buf.len bytes into buf. + /// Read exactly buf.len bytes into buf. pub fn readAll(self: *Self, buf: []u8) !void { assert(self.alignBits() == 0); // internal bits must be at byte boundary @@ -87,17 +87,17 @@ pub fn BitReader(comptime ReaderType: type) type { pub const reverse: u3 = 0b100; // bit reverse readed bits }; - // Alias for readF(U, 0). + /// Alias for readF(U, 0). pub fn read(self: *Self, comptime U: type) !U { return self.readF(U, 0); } - // Alias for readF with flag.peak set. + /// Alias for readF with flag.peak set. pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U { return self.readF(U, how | flag.peek); } - // Read with flags provided. + /// Read with flags provided. pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U { const n: u6 = @bitSizeOf(U); switch (how) { @@ -140,8 +140,8 @@ pub fn BitReader(comptime ReaderType: type) type { } } - // Read n number of bits. - // Only buffered flag can be used in how. + /// Read n number of bits. + /// Only buffered flag can be used in how. pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 { switch (how) { 0 => { @@ -156,14 +156,14 @@ pub fn BitReader(comptime ReaderType: type) type { return u; } - // Advance buffer for n bits. + /// Advance buffer for n bits. pub fn shift(self: *Self, n: u6) !void { if (n > self.nbits) return error.EndOfStream; self.bits >>= n; self.nbits -= n; } - // Skip n bytes. + /// Skip n bytes. pub fn skipBytes(self: *Self, n: u16) !void { for (0..n) |_| { try self.fill(8); @@ -176,32 +176,32 @@ pub fn BitReader(comptime ReaderType: type) type { return @intCast(self.nbits & 0x7); } - // Align stream to the byte boundary. + /// Align stream to the byte boundary. pub fn alignToByte(self: *Self) void { const ab = self.alignBits(); if (ab > 0) self.shift(ab) catch unreachable; } - // Skip zero terminated string. + /// Skip zero terminated string. pub fn skipStringZ(self: *Self) !void { while (true) { if (try self.readF(u8, 0) == 0) break; } } - // Read deflate fixed fixed code. - // Reads first 7 bits, and then mybe 1 or 2 more to get full 7,8 or 9 bit code. - // ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12 - // Lit Value Bits Codes - // --------- ---- ----- - // 0 - 143 8 00110000 through - // 10111111 - // 144 - 255 9 110010000 through - // 111111111 - // 256 - 279 7 0000000 through - // 0010111 - // 280 - 287 8 11000000 through - // 11000111 + /// Read deflate fixed fixed code. + /// Reads first 7 bits, and then mybe 1 or 2 more to get full 7,8 or 9 bit code. + /// ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12 + /// Lit Value Bits Codes + /// --------- ---- ----- + /// 0 - 143 8 00110000 through + /// 10111111 + /// 144 - 255 9 110010000 through + /// 111111111 + /// 256 - 279 7 0000000 through + /// 0010111 + /// 280 - 287 8 11000000 through + /// 11000111 pub fn readFixedCode(self: *Self) !u16 { try self.fill(7 + 2); const code7 = try self.readF(u7, flag.buffered | flag.reverse); diff --git a/src/container.zig b/src/container.zig index 9e6f742..477a7c6 100644 --- a/src/container.zig +++ b/src/container.zig @@ -1,19 +1,20 @@ +//! Container of the deflate bit stream body. Container adds header before +//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, +//! no footer, raw bit stream). +//! +//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes +//! addler 32 checksum. +//! +//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes +//! crc32 checksum and 4 bytes of uncompressed data length. +//! +//! +//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 +//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5 +//! + const std = @import("std"); -/// Container of the deflate bit stream body. Container adds header before -/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, -/// no footer, raw bit stream). -/// -/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes -/// addler 32 checksum. -/// -/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes -/// crc32 checksum and 4 bytes of uncompressed data length. -/// -/// -/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 -/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5 -/// pub const Container = enum { raw, // no header or footer gzip, // gzip header and footer