klauspost · klauspost · Jan 2, 2023
diff --git a/zstd/blockenc.go b/zstd/blockenc.go
@@ -69,6 +69,7 @@ func (b *blockEnc) init() {
 		b.coders.llPrev = &fseEncoder{}
 	}
 	b.litEnc = &huff0.Scratch{WantLogLess: 4}
+	b.recentOffsets = [3]uint32{1, 4, 8}
 	b.reset(nil)
 }
 
@@ -90,14 +91,11 @@ func (b *blockEnc) reset(prev *blockEnc) {
 	b.output = b.output[:0]
 	b.last = false
 	if prev != nil {
-		b.recentOffsets = prev.prevRecentOffsets
+		b.recentOffsets = prev.recentOffsets
 	}
 	b.dictLitEnc = nil
 }
 
-// reset will reset the block for a new encode, but in the same stream,
-// meaning that state will be carried over, but the block content is reset.
-// If a previous block is provided, the recent offsets are carried over.
 func (b *blockEnc) swapEncoders(prev *blockEnc) {
 	b.coders.swap(&prev.coders)
 	b.litEnc, prev.litEnc = prev.litEnc, b.litEnc

diff --git a/zstd/enc_best.go b/zstd/enc_best.go
@@ -182,10 +182,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 
 encodeLoop:
 	for {
-		// We allow the encoder to optionally turn off repeat offsets across blocks
-		canRepeat := len(blk.sequences) > 2
-
-		if debugAsserts && canRepeat && offset1 == 0 {
+		if debugAsserts && offset1 == 0 {
 			panic("offset0 was 0")
 		}
 
@@ -216,25 +213,40 @@ encodeLoop:
 			return m
 		}
 
+		// matchAtSafe will also check for invalid offsets
+		matchAtSafe := func(offset int32, s int32, first uint32, rep int32) match {
+			if offset < 0 || s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+				return match{s: s, est: highScore}
+			}
+			if debugAsserts {
+				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
+					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				}
+			}
+			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
+			m.estBits(bitsPerByte)
+			return m
+		}
+
 		m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
 		m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
 		m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
 		m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
 		best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
 
-		if canRepeat && best.length < goodEnough {
+		if best.length < goodEnough {
 			cv32 := uint32(cv >> 8)
 			spp := s + 1
-			m1 := matchAt(spp-offset1, spp, cv32, 1)
-			m2 := matchAt(spp-offset2, spp, cv32, 2)
-			m3 := matchAt(spp-offset3, spp, cv32, 3)
+			m1 := matchAtSafe(spp-offset1, spp, cv32, 1)
+			m2 := matchAtSafe(spp-offset2, spp, cv32, 2)
+			m3 := matchAtSafe(spp-offset3, spp, cv32, 3)
 			best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
 			if best.length > 0 {
 				cv32 = uint32(cv >> 24)
 				spp += 2
-				m1 := matchAt(spp-offset1, spp, cv32, 1)
-				m2 := matchAt(spp-offset2, spp, cv32, 2)
-				m3 := matchAt(spp-offset3, spp, cv32, 3)
+				m1 := matchAtSafe(spp-offset1, spp, cv32, 1)
+				m2 := matchAtSafe(spp-offset2, spp, cv32, 2)
+				m3 := matchAtSafe(spp-offset3, spp, cv32, 3)
 				best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
 			}
 		}
@@ -426,14 +438,11 @@ encodeLoop:
 		}
 
 		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
 
 		// Check offset 2
 		for {
 			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
+			if o2 < 0 || load3232(src, o2) != uint32(cv) {
 				// Do regular search
 				break
 			}

diff --git a/zstd/enc_better.go b/zstd/enc_better.go
@@ -144,11 +144,10 @@ encodeLoop:
 	for {
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
-		canRepeat := len(blk.sequences) > 2
 		var matched int32
 
 		for {
-			if debugAsserts && canRepeat && offset1 == 0 {
+			if debugAsserts && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -163,7 +162,7 @@ encodeLoop:
 			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 
-			if canRepeat {
+			if true {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
@@ -461,7 +460,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -510,14 +509,11 @@ encodeLoop:
 		}
 
 		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
 
 		// Check offset 2
 		for {
 			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
+			if o2 < 0 || load3232(src, o2) != uint32(cv) {
 				// Do regular search
 				break
 			}
@@ -671,11 +667,10 @@ encodeLoop:
 	for {
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
-		canRepeat := len(blk.sequences) > 2
 		var matched int32
 
 		for {
-			if debugAsserts && canRepeat && offset1 == 0 {
+			if debugAsserts && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -692,7 +687,7 @@ encodeLoop:
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 			e.markShortShardDirty(nextHashS)
 
-			if canRepeat {
+			if true {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
@@ -987,7 +982,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -1039,13 +1034,13 @@ encodeLoop:
 		}
 
 		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
 
 		// Check offset 2
 		for {
 			o2 := s - offset2
+			if o2 < 0 {
+				break
+			}
 			if load3232(src, o2) != uint32(cv) {
 				// Do regular search
 				break

diff --git a/zstd/enc_dfast.go b/zstd/enc_dfast.go
@@ -115,11 +115,9 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 encodeLoop:
 	for {
 		var t int32
-		// We allow the encoder to optionally turn off repeat offsets across blocks
-		canRepeat := len(blk.sequences) > 2
 
 		for {
-			if debugAsserts && canRepeat && offset1 == 0 {
+			if debugAsserts && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -134,7 +132,7 @@ encodeLoop:
 			e.longTable[nextHashL] = entry
 			e.table[nextHashS] = entry
 
-			if canRepeat {
+			if true {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
@@ -258,7 +256,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && len(blk.sequences) > 2 && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -316,14 +314,10 @@ encodeLoop:
 
 		cv = load6432(src, s)
 
-		if !canRepeat {
-			continue
-		}
-
 		// Check offset 2
 		for {
 			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
+			if o2 < 0 || load3232(src, o2) != uint32(cv) {
 				// Do regular search
 				break
 			}
@@ -446,7 +440,7 @@ encodeLoop:
 			e.longTable[nextHashL] = entry
 			e.table[nextHashS] = entry
 
-			if len(blk.sequences) > 2 {
+			if repIndex >= 0 {
 				if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
@@ -626,14 +620,10 @@ encodeLoop:
 
 		cv = load6432(src, s)
 
-		if len(blk.sequences) <= 2 {
-			continue
-		}
-
 		// Check offset 2
 		for {
 			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
+			if o2 < 0 || load3232(src, o2) != uint32(cv) {
 				// Do regular search
 				break
 			}

diff --git a/zstd/enc_fast.go b/zstd/enc_fast.go
@@ -95,6 +95,13 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	offset1 := int32(blk.recentOffsets[0])
 	offset2 := int32(blk.recentOffsets[1])
 
+	if debugAsserts && offset1 == 0 {
+		panic(offset1)
+	}
+	if debugAsserts && offset2 == 0 {
+		panic(offset2)
+	}
+
 	addLiterals := func(s *seq, until int32) {
 		if until == nextEmit {
 			return
@@ -112,12 +119,8 @@ encodeLoop:
 		// When existing the search loop, we have already checked 4 bytes.
 		var t int32
 
-		// We will not use repeat offsets across blocks.
-		// By not using them for the first 3 matches
-		canRepeat := len(blk.sequences) > 2
-
 		for {
-			if debugAsserts && canRepeat && offset1 == 0 {
+			if debugAsserts && len(blk.sequences) > 1 && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -130,7 +133,10 @@ encodeLoop:
 			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
 
-			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+			if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+				if debugAsserts && repIndex == s+2 {
+					panic(offset1)
+				}
 				// Consider history as well.
 				var seq seq
 				var length int32
@@ -216,7 +222,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && len(blk.sequences) > 1 && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -255,7 +261,7 @@ encodeLoop:
 		cv = load6432(src, s)
 
 		// Check offset 2
-		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+		if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
 			l := 4 + e.matchlen(s+4, o2+4, src)
@@ -344,6 +350,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	offset1 := int32(blk.recentOffsets[0])
 	offset2 := int32(blk.recentOffsets[1])
 
+	if offset1 == 0 {
+		panic(offset1)
+	}
+	if offset2 == 0 {
+		panic(offset2)
+	}
+
 	addLiterals := func(s *seq, until int32) {
 		if until == nextEmit {
 			return
@@ -374,7 +387,7 @@ encodeLoop:
 			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
 
-			if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
+			if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
 				length := 4 + e.matchlen(s+6, repIndex+4, src)
@@ -501,7 +514,7 @@ encodeLoop:
 		cv = load6432(src, s)
 
 		// Check offset 2
-		if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
+		if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
 			l := 4 + e.matchlen(s+4, o2+4, src)
@@ -622,12 +635,8 @@ encodeLoop:
 		// When existing the search loop, we have already checked 4 bytes.
 		var t int32
 
-		// We will not use repeat offsets across blocks.
-		// By not using them for the first 3 matches
-		canRepeat := len(blk.sequences) > 2
-
 		for {
-			if debugAsserts && canRepeat && offset1 == 0 {
+			if debugAsserts && len(blk.sequences) > 0 && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -642,7 +651,7 @@ encodeLoop:
 			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
 			e.markShardDirty(nextHash2)
 
-			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+			if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
 				var length int32
@@ -729,7 +738,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && len(blk.sequences) > 0 && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -768,7 +777,7 @@ encodeLoop:
 		cv = load6432(src, s)
 
 		// Check offset 2
-		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+		if o2 := s - offset2; o2 >= 0 && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
 			l := 4 + e.matchlen(s+4, o2+4, src)