From 32680008438da632ceb0a9f9bc335c9b27fafc28 Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Thu, 12 Sep 2024 16:37:47 +0530 Subject: [PATCH] Handle zero chunk size The existing implementation of the method getChunkSize(...) (uint64, error), in some cases, can return chunkSize==0 and err==nil. The onus is on the caller to check for such possibility and handle it properly. Callers often use the returned chunkSize as a divisor and a zero chunkSize lead to panic. see https://github.com/blevesearch/zapx/issues/209 This PR intends to update the method implementation to always return an error in case the returned chunkSize value is 0. That way caller need to only worry about error being non-nil. Callers which are ok with 0 chunkSize can check the returned error against ErrChunkSizeZero --- chunk.go | 14 ++++++++++++++ posting.go | 13 +++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/chunk.go b/chunk.go index 4307d0ed..62c5deaf 100644 --- a/chunk.go +++ b/chunk.go @@ -15,6 +15,7 @@ package zap import ( + "errors" "fmt" ) @@ -26,6 +27,13 @@ var LegacyChunkMode uint32 = 1024 // be used by default. var DefaultChunkMode uint32 = 1026 +var ErrChunkSizeZero = errors.New("chunk size is zero") + +// getChunkSize returns the chunk size for the given chunkMode, cardinality, and +// maxDocs. +// +// In error cases, the returned chunk size will be 0. Caller can differentiate +// between a valid chunk size of 0 and an error by checking for ErrChunkSizeZero. func getChunkSize(chunkMode uint32, cardinality uint64, maxDocs uint64) (uint64, error) { switch { // any chunkMode <= 1024 will always chunk with chunkSize=chunkMode @@ -46,6 +54,9 @@ func getChunkSize(chunkMode uint32, cardinality uint64, maxDocs uint64) (uint64, // chunk-size items. // no attempt is made to tweak any other case if cardinality <= 1024 { + if maxDocs == 0 { + return 0, ErrChunkSizeZero + } return maxDocs, nil } return 1024, nil @@ -61,6 +72,9 @@ func getChunkSize(chunkMode uint32, cardinality uint64, maxDocs uint64) (uint64, // 2. convert to chunkSize, dividing into maxDocs numChunks := (cardinality / 1024) + 1 chunkSize := maxDocs / numChunks + if chunkSize == 0 { + return 0, ErrChunkSizeZero + } return chunkSize, nil } return 0, fmt.Errorf("unknown chunk mode %d", chunkMode) diff --git a/posting.go b/posting.go index ad47df0d..07ae202f 100644 --- a/posting.go +++ b/posting.go @@ -305,10 +305,7 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error { chunkSize, err := getChunkSize(d.sb.chunkMode, rv.postings.GetCardinality(), d.sb.numDocs) if err != nil { - return err - } else if chunkSize == 0 { - return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v", - d.sb.chunkMode, d.sb.numDocs) + return fmt.Errorf("failed to get chunk size: %v", err) } rv.chunkSize = chunkSize @@ -632,6 +629,10 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, return i.nextDocNumAtOrAfterClean(atOrAfter) } + if i.postings.chunkSize == 0 { + return 0, false, ErrChunkSizeZero + } + i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) if !i.Actual.HasNext() || !i.all.HasNext() { @@ -741,6 +742,10 @@ func (i *PostingsIterator) nextDocNumAtOrAfterClean( return uint64(i.Actual.Next()), true, nil } + if i.postings != nil && i.postings.chunkSize == 0 { + return 0, false, ErrChunkSizeZero + } + // freq-norm's needed, so maintain freq-norm chunk reader sameChunkNexts := 0 // # of times we called Next() in the same chunk n := i.Actual.Next()