diff --git a/faiss_vector_posting.go b/faiss_vector_posting.go index b241b42c..0dd2a368 100644 --- a/faiss_vector_posting.go +++ b/faiss_vector_posting.go @@ -387,13 +387,94 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool vectorIDsToInclude = append(vectorIDsToInclude, docVecIDMap[uint32(id)]...) } - scores, ids, err := vecIndex.SearchWithIDs(qVector, k, - vectorIDsToInclude, params) + // Retrieve the mapping of centroid IDs to vectors within + // the cluster. + clusterAssignment, _ := vecIndex.ObtainClusterToVecIDsFromIVFIndex() + // Accounting for a flat index + if len(clusterAssignment) == 0 { + scores, ids, err := vecIndex.SearchWithIDs(qVector, k, + vectorIDsToInclude, params) + if err != nil { + return nil, err + } + + addIDsToPostingsList(rv, ids, scores) + return rv, nil + } + + // Converting to roaring bitmap for ease of intersect ops with + // the set of eligible doc IDs. + centroidVecIDMap := make(map[int64]*roaring.Bitmap) + for centroidID, vecIDs := range clusterAssignment { + if _, exists := centroidVecIDMap[centroidID]; !exists { + centroidVecIDMap[centroidID] = roaring.NewBitmap() + } + for _, vecID := range vecIDs { + centroidVecIDMap[centroidID].Add(uint32(vecID)) + } + } + + // Getting the vector IDs corresponding to the eligible + // doc IDs. + eligibleVecIDsBitmap := roaring.NewBitmap() + for _, eligibleDocID := range eligibleDocIDs { + vecIDs := docVecIDMap[uint32(eligibleDocID)] + for _, vecID := range vecIDs { + eligibleVecIDsBitmap.Add(uint32(vecID)) + } + } + + // Determining which clusters, identified by centroid ID, + // have at least one eligible vector and hence, ought to be + // probed. + eligibleCentroidIDs := make([]int64, 0) + for centroidID, vecIDs := range centroidVecIDMap { + vecIDs.And(eligibleVecIDsBitmap) + if vecIDs.GetCardinality() > 0 { + // The mapping is now reduced to those vectors which + // are also eligible docs for the filter query. + centroidVecIDMap[centroidID] = vecIDs + eligibleCentroidIDs = append(eligibleCentroidIDs, centroidID) + } else { + // don't consider clusters with no eligible IDs. + delete(centroidVecIDMap, centroidID) + } + } + + // Ordering the retrieved centroid IDs by increasing order + // of distance i.e. decreasing order of proximity to query vector. + closestCentroidIDs, centroidDistances, _ := + vecIndex.ObtainClustersWithDistancesFromIVFIndex(qVector, + eligibleCentroidIDs) + + // Getting the nprobe value set at index time. + nprobe := vecIndex.GetNProbe() + + eligibleDocsTillNow := int64(0) + minEligibleCentroids := 0 + for i, centroidID := range closestCentroidIDs { + eligibleDocsTillNow += int64(centroidVecIDMap[centroidID].GetCardinality()) + if eligibleDocsTillNow >= k && i >= int(nprobe-1) { + // Continue till at least 'K' cumulative vectors are + // collected or 'nprobe' clusters are examined, whichever + // comes later. + minEligibleCentroids = i + 1 + break + } + minEligibleCentroids = i + 1 + } + + // Search the clusters specified by 'closestCentroidIDs' for + // vectors whose IDs are present in 'vectorIDsToInclude' + scores, ids, err := vecIndex.SearchClustersFromIVFIndex( + vectorIDsToInclude, closestCentroidIDs, minEligibleCentroids, + k, qVector, centroidDistances, params) if err != nil { return nil, err } addIDsToPostingsList(rv, ids, scores) + return rv, nil } return rv, nil }, diff --git a/go.mod b/go.mod index bae00656..8036a9f6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.21 require ( github.com/RoaringBitmap/roaring v1.9.3 github.com/blevesearch/bleve_index_api v1.1.12 - github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 + github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a github.com/blevesearch/mmap-go v1.0.4 github.com/blevesearch/scorch_segment_api/v2 v2.2.16 github.com/blevesearch/vellum v1.0.10 diff --git a/go.sum b/go.sum index 3b5bb897..e50648f2 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+5kexNy1RXfegY= github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8= -github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 h1:riy8XP3UIBeVjMhsq1r1aGfjvTf3aPp2PuXxdiw9P4s= -github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= +github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a h1:mSUfDoOPOLt0OABjiyQq/kQxOzAJmsgIjlAWUPfUDfc= +github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= github.com/blevesearch/scorch_segment_api/v2 v2.2.16 h1:uGvKVvG7zvSxCwcm4/ehBa9cCEuZVE+/zvrSl57QUVY=