From 165d9a534e27be3f53903e7ec5c1d8557c0f11ea Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 9 Aug 2023 11:47:31 +0530 Subject: [PATCH] implementing the Count() API for the postings list, updated todos --- build.go | 2 +- section_vector_index.go | 4 ++++ vector_search.go | 12 ++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/build.go b/build.go index d9aadadd..d5686bfd 100644 --- a/build.go +++ b/build.go @@ -24,7 +24,7 @@ import ( "github.com/blevesearch/vellum" ) -const Version uint32 = 16 +const Version uint32 = 15 const Type string = "zap" diff --git a/section_vector_index.go b/section_vector_index.go index d12ce49e..7d7673e8 100644 --- a/section_vector_index.go +++ b/section_vector_index.go @@ -141,6 +141,10 @@ func (vo *vectorIndexOpaque) writeVectorIndexes(w *CountHashWriter) (offset uint // fixme: this can cause a write amplification. need to improve this. // todo: might need to a reformating to optimize according to mmap needs. + // reformating idea: storing all the IDs mapping towards the end of the + // section would be help avoiding in paging in this data as part of a page + // (which is to load a non-cacheable info like index). this could help the + // paging costs for vecID, docIDs := range docIDsMap { // write the vecID _, err := writeUvarints(w, vecID) diff --git a/vector_search.go b/vector_search.go index 2ccb4f10..1b6d0f72 100644 --- a/vector_search.go +++ b/vector_search.go @@ -36,6 +36,9 @@ func (vp *VecPosting) Size() int { // the score is actually a float32 value and in order to store it as a uint32 in // the bitmap, we use the IEEE 754 floating point format. type VecPostingsList struct { + // todo: perhaps we don't even need to store a bitmap if there is only + // one similar vector the query, but rather store it as a field value + // in the struct except *roaring64.Bitmap postings *roaring64.Bitmap } @@ -90,8 +93,13 @@ func (vpl *VecPostingsList) Size() int { return 0 } -func (vpl *VecPostingsList) Count() uint64 { - return 0 +func (p *VecPostingsList) Count() uint64 { + n := p.postings.GetCardinality() + var e uint64 + if p.except != nil { + e = p.postings.AndCardinality(p.except) + } + return n - e } func (vpl *VecPostingsList) ResetBytesRead(val uint64) {