Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update Snappy 1.1.10 #137

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ see the [CompFuzz Results](https://github.com/nemequ/compfuzz/wiki/Results) page
- [quicklz 1.5.0](http://www.quicklz.com)
- [shrinker 0.1](https://code.google.com/p/data-shrinker) - WARNING: it can throw SEGFAULT compiled with gcc 4.9+ -O3
- [slz 1.2.0](http://www.libslz.org/) - only a compressor, uses zlib for decompression
- [snappy 2020-07-11 (4dd277f)](https://github.com/google/snappy)
- [snappy 1.1.10](https://github.com/google/snappy)
- [tornado 0.6a](http://freearc.org)
- [ucl 1.03](http://www.oberhumer.com/opensource/ucl/)
- [wflz 2015-09-16](https://github.com/ShaneWF/wflz) - WARNING: it can throw SEGFAULT compiled with gcc 4.9+ -O3
Expand Down
2 changes: 1 addition & 1 deletion _lzbench/lzbench.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] =
{ "slz_deflate","1.2.0", 1, 3, 2, 0, lzbench_slz_compress, lzbench_slz_decompress, NULL, NULL },
{ "slz_gzip", "1.2.0", 1, 3, 1, 0, lzbench_slz_compress, lzbench_slz_decompress, NULL, NULL },
{ "slz_zlib", "1.2.0", 1, 3, 0, 0, lzbench_slz_compress, lzbench_slz_decompress, NULL, NULL },
{ "snappy", "2020-07-11", 0, 0, 0, 0, lzbench_snappy_compress, lzbench_snappy_decompress, NULL, NULL },
{ "snappy", "1.1.10", 0, 0, 0, 0, lzbench_snappy_compress, lzbench_snappy_decompress, NULL, NULL },
{ "tornado", "0.6a", 1, 16, 0, 0, lzbench_tornado_compress, lzbench_tornado_decompress, NULL, NULL },
{ "ucl_nrv2b", "1.03", 1, 9, 0, 0, lzbench_ucl_nrv2b_compress, lzbench_ucl_nrv2b_decompress, NULL, NULL },
{ "ucl_nrv2d", "1.03", 1, 9, 0, 0, lzbench_ucl_nrv2d_compress, lzbench_ucl_nrv2d_decompress, NULL, NULL },
Expand Down
Empty file removed snappy/config.h
Empty file.
104 changes: 92 additions & 12 deletions snappy/snappy-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,84 @@

#include "snappy-stubs-internal.h"

#if SNAPPY_HAVE_SSSE3
// Please do not replace with <x86intrin.h> or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <emmintrin.h>
#include <tmmintrin.h>
#endif

#if SNAPPY_HAVE_NEON
#include <arm_neon.h>
#endif

#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
#else
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
#endif

namespace snappy {
namespace internal {

#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
#if SNAPPY_HAVE_SSSE3
using V128 = __m128i;
#elif SNAPPY_HAVE_NEON
using V128 = uint8x16_t;
#endif

// Load 128 bits of integer data. `src` must be 16-byte aligned.
inline V128 V128_Load(const V128* src);

// Load 128 bits of integer data. `src` does not need to be aligned.
inline V128 V128_LoadU(const V128* src);

// Store 128 bits of integer data. `dst` does not need to be aligned.
inline void V128_StoreU(V128* dst, V128 val);

// Shuffle packed 8-bit integers using a shuffle mask.
// Each packed integer in the shuffle mask must be in [0,16).
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);

// Constructs V128 with 16 chars |c|.
inline V128 V128_DupChar(char c);

#if SNAPPY_HAVE_SSSE3
inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }

inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }

inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }

inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
return _mm_shuffle_epi8(input, shuffle_mask);
}

inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }

#elif SNAPPY_HAVE_NEON
inline V128 V128_Load(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}

inline V128 V128_LoadU(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}

inline void V128_StoreU(V128* dst, V128 val) {
vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
}

inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
return vqtbl1q_u8(input, shuffle_mask);
}

inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
#endif
#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE

// Working memory performs a single allocation to hold all scratch space
// required for compression.
class WorkingMemory {
Expand Down Expand Up @@ -95,8 +170,9 @@ char* CompressFragment(const char* input,
// loading from s2 + n.
//
// Separate implementation for 64-bit, little-endian cpus.
#if !defined(SNAPPY_IS_BIG_ENDIAN) && \
(defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
#if !SNAPPY_IS_BIG_ENDIAN && \
(defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
defined(ARCH_ARM))
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
const char* s2,
const char* s2_limit,
Expand Down Expand Up @@ -154,8 +230,9 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3;
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__
*data = UNALIGNED_LOAD64(s2 + matched_bytes);
a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else
// Ideally this would just be
//
Expand All @@ -166,13 +243,13 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
// use a conditional move (it's tuned to cut data dependencies). In this
// case there is a longer parallel chain anyway AND this will be fairly
// unpredictable.
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t"
: "+r"(a2)
: "r"(a3), "r"(xorval));
*data = a2 >> (shift & (3 * 8));
: "r"(a3), "r"(xorval)
: "cc");
#endif
*data = a2 >> (shift & (3 * 8));
return std::pair<size_t, bool>(matched_bytes, true);
} else {
matched = 8;
Expand All @@ -194,16 +271,17 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3;
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__
*data = UNALIGNED_LOAD64(s2 + matched_bytes);
a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t"
: "+r"(a2)
: "r"(a3), "r"(xorval));
*data = a2 >> (shift & (3 * 8));
: "r"(a3), "r"(xorval)
: "cc");
#endif
*data = a2 >> (shift & (3 * 8));
matched += matched_bytes;
assert(matched >= 8);
return std::pair<size_t, bool>(matched, false);
Expand Down Expand Up @@ -274,7 +352,8 @@ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual o
// because of efficiency reasons:
// (1) Extracting a byte is faster than a bit-field
// (2) It properly aligns copy offset so we do not need a <<8
static const uint16_t char_table[256] = {
static constexpr uint16_t char_table[256] = {
// clang-format off
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
Expand Down Expand Up @@ -306,7 +385,8 @@ static const uint16_t char_table[256] = {
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
// clang-format on
};

} // end namespace internal
Expand Down
22 changes: 11 additions & 11 deletions snappy/snappy-sinksource.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ class Source {
class ByteArraySource : public Source {
public:
ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
virtual ~ByteArraySource();
virtual size_t Available() const;
virtual const char* Peek(size_t* len);
virtual void Skip(size_t n);
~ByteArraySource() override;
size_t Available() const override;
const char* Peek(size_t* len) override;
void Skip(size_t n) override;
private:
const char* ptr_;
size_t left_;
Expand All @@ -159,15 +159,15 @@ class ByteArraySource : public Source {
class UncheckedByteArraySink : public Sink {
public:
explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
virtual ~UncheckedByteArraySink();
virtual void Append(const char* data, size_t n);
virtual char* GetAppendBuffer(size_t len, char* scratch);
virtual char* GetAppendBufferVariable(
~UncheckedByteArraySink() override;
void Append(const char* data, size_t n) override;
char* GetAppendBuffer(size_t len, char* scratch) override;
char* GetAppendBufferVariable(
size_t min_size, size_t desired_size_hint, char* scratch,
size_t scratch_size, size_t* allocated_size);
virtual void AppendAndTakeOwnership(
size_t scratch_size, size_t* allocated_size) override;
void AppendAndTakeOwnership(
char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
void *deleter_arg);
void *deleter_arg) override;

// Return the current output pointer so that a caller can see how
// many bytes were produced.
Expand Down
Loading