Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Use offsets between blocks #724

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func (b *blockEnc) init() {
b.coders.llPrev = &fseEncoder{}
}
b.litEnc = &huff0.Scratch{WantLogLess: 4}
b.recentOffsets = [3]uint32{1, 4, 8}
b.reset(nil)
}

Expand All @@ -90,14 +91,11 @@ func (b *blockEnc) reset(prev *blockEnc) {
b.output = b.output[:0]
b.last = false
if prev != nil {
b.recentOffsets = prev.prevRecentOffsets
b.recentOffsets = prev.recentOffsets
}
b.dictLitEnc = nil
}

// reset will reset the block for a new encode, but in the same stream,
// meaning that state will be carried over, but the block content is reset.
// If a previous block is provided, the recent offsets are carried over.
func (b *blockEnc) swapEncoders(prev *blockEnc) {
b.coders.swap(&prev.coders)
b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
Expand Down
39 changes: 24 additions & 15 deletions zstd/enc_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {

encodeLoop:
for {
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2

if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && offset1 == 0 {
panic("offset0 was 0")
}

Expand Down Expand Up @@ -216,25 +213,40 @@ encodeLoop:
return m
}

// matchAtSafe will also check for invalid offsets
matchAtSafe := func(offset int32, s int32, first uint32, rep int32) match {
if offset < 0 || s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{s: s, est: highScore}
}
if debugAsserts {
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
return m
}

m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))

if canRepeat && best.length < goodEnough {
if best.length < goodEnough {
cv32 := uint32(cv >> 8)
spp := s + 1
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
m1 := matchAtSafe(spp-offset1, spp, cv32, 1)
m2 := matchAtSafe(spp-offset2, spp, cv32, 2)
m3 := matchAtSafe(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
if best.length > 0 {
cv32 = uint32(cv >> 24)
spp += 2
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
m1 := matchAtSafe(spp-offset1, spp, cv32, 1)
m2 := matchAtSafe(spp-offset2, spp, cv32, 2)
m3 := matchAtSafe(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
}
}
Expand Down Expand Up @@ -426,14 +438,11 @@ encodeLoop:
}

cv = load6432(src, s)
if !canRepeat {
continue
}

// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
if o2 < 0 || load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
Expand Down
25 changes: 10 additions & 15 deletions zstd/enc_better.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,10 @@ encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32

for {
if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && offset1 == 0 {
panic("offset0 was 0")
}

Expand All @@ -163,7 +162,7 @@ encodeLoop:
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}

if canRepeat {
if true {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
Expand Down Expand Up @@ -461,7 +460,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && int(offset1) > len(src) {
panic("invalid offset")
}

Expand Down Expand Up @@ -510,14 +509,11 @@ encodeLoop:
}

cv = load6432(src, s)
if !canRepeat {
continue
}

// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
if o2 < 0 || load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
Expand Down Expand Up @@ -671,11 +667,10 @@ encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32

for {
if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && offset1 == 0 {
panic("offset0 was 0")
}

Expand All @@ -692,7 +687,7 @@ encodeLoop:
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)

if canRepeat {
if true {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
Expand Down Expand Up @@ -987,7 +982,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && int(offset1) > len(src) {
panic("invalid offset")
}

Expand Down Expand Up @@ -1039,13 +1034,13 @@ encodeLoop:
}

cv = load6432(src, s)
if !canRepeat {
continue
}

// Check offset 2
for {
o2 := s - offset2
if o2 < 0 {
break
}
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
Expand Down
22 changes: 6 additions & 16 deletions zstd/enc_dfast.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,9 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2

for {
if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && offset1 == 0 {
panic("offset0 was 0")
}

Expand All @@ -134,7 +132,7 @@ encodeLoop:
e.longTable[nextHashL] = entry
e.table[nextHashS] = entry

if canRepeat {
if true {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
Expand Down Expand Up @@ -258,7 +256,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && len(blk.sequences) > 2 && int(offset1) > len(src) {
panic("invalid offset")
}

Expand Down Expand Up @@ -316,14 +314,10 @@ encodeLoop:

cv = load6432(src, s)

if !canRepeat {
continue
}

// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
if o2 < 0 || load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
Expand Down Expand Up @@ -446,7 +440,7 @@ encodeLoop:
e.longTable[nextHashL] = entry
e.table[nextHashS] = entry

if len(blk.sequences) > 2 {
if repIndex >= 0 {
if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
Expand Down Expand Up @@ -626,14 +620,10 @@ encodeLoop:

cv = load6432(src, s)

if len(blk.sequences) <= 2 {
continue
}

// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
if o2 < 0 || load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
Expand Down
45 changes: 27 additions & 18 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])

if debugAsserts && offset1 == 0 {
panic(offset1)
}
if debugAsserts && offset2 == 0 {
panic(offset2)
}

addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
Expand All @@ -112,12 +119,8 @@ encodeLoop:
// When existing the search loop, we have already checked 4 bytes.
var t int32

// We will not use repeat offsets across blocks.
// By not using them for the first 3 matches
canRepeat := len(blk.sequences) > 2

for {
if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && len(blk.sequences) > 1 && offset1 == 0 {
panic("offset0 was 0")
}

Expand All @@ -130,7 +133,10 @@ encodeLoop:
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}

if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
if debugAsserts && repIndex == s+2 {
panic(offset1)
}
// Consider history as well.
var seq seq
var length int32
Expand Down Expand Up @@ -216,7 +222,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && len(blk.sequences) > 1 && int(offset1) > len(src) {
panic("invalid offset")
}

Expand Down Expand Up @@ -255,7 +261,7 @@ encodeLoop:
cv = load6432(src, s)

// Check offset 2
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
Expand Down Expand Up @@ -344,6 +350,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])

if offset1 == 0 {
panic(offset1)
}
if offset2 == 0 {
panic(offset2)
}

addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
Expand Down Expand Up @@ -374,7 +387,7 @@ encodeLoop:
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}

if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
length := 4 + e.matchlen(s+6, repIndex+4, src)
Expand Down Expand Up @@ -501,7 +514,7 @@ encodeLoop:
cv = load6432(src, s)

// Check offset 2
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
Expand Down Expand Up @@ -622,12 +635,8 @@ encodeLoop:
// When existing the search loop, we have already checked 4 bytes.
var t int32

// We will not use repeat offsets across blocks.
// By not using them for the first 3 matches
canRepeat := len(blk.sequences) > 2

for {
if debugAsserts && canRepeat && offset1 == 0 {
if debugAsserts && len(blk.sequences) > 0 && offset1 == 0 {
panic("offset0 was 0")
}

Expand All @@ -642,7 +651,7 @@ encodeLoop:
e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
e.markShardDirty(nextHash2)

if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
var length int32
Expand Down Expand Up @@ -729,7 +738,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && len(blk.sequences) > 0 && int(offset1) > len(src) {
panic("invalid offset")
}

Expand Down Expand Up @@ -768,7 +777,7 @@ encodeLoop:
cv = load6432(src, s)

// Check offset 2
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
Expand Down
Loading