Merge pull request #367 from nmslib/develop

Update master to 0.6.1
nmslib · Feb 6, 2022 · 21e20f3 · 21e20f3
2 parents 14cabd0 + 2ebbc2c
commit 21e20f3
Show file tree

Hide file tree

Showing 9 changed files with 228 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,14 @@ Header-only C++ HNSW implementation with python bindings.
 
 **NEWS:**
 
-**version 0.6** 
+
+**version 0.6.1** 
+
+* Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures. 
+* Thanks to ([@psobot](https://github.com/psobot)) there is now a sencible message instead of segfault when passing a scalar to get_items.
+* Thanks to ([@urigoren](https://github.com/urigoren)) hnswlib has a lazy index creation python wrapper.
+
+**version 0.6.0** 
 * Thanks to ([@dyashuni](https://github.com/dyashuni)) hnswlib now uses github actions for CI, there is a search speedup in some scenarios with deletions. `unmark_deleted(label)` is now also a part of the python interface (note now it throws an exception for double deletions). 
 * Thanks to ([@slice4e](https://github.com/slice4e)) we now support AVX512; thanks to ([@LTLA](https://github.com/LTLA)) the cmake interface for the lib is now updated. 
 * Thanks to ([@alonre24](https://github.com/alonre24)) we now have a python bindings for brute-force (and examples for recall tuning: [TESTING_RECALL.md](TESTING_RECALL.md). 

diff --git a/examples/git_tester.py b/examples/git_tester.py
@@ -1,16 +1,34 @@
 from pydriller import Repository
 import os 
 import datetime
-os.system("cp examples/speedtest.py examples/speedtest2.py")
-for commit in Repository('.', from_tag="v0.5.2").traverse_commits():
-    print(commit.hash)
-    print(commit.msg)
+os.system("cp examples/speedtest.py examples/speedtest2.py") # the file has to be outside of git
+for idx, commit in enumerate(Repository('.', from_tag="v0.6.0").traverse_commits()):    
+    name=commit.msg.replace('\n', ' ').replace('\r', ' ')
+    print(idx, commit.hash, name)
+
+
+
+for commit in Repository('.', from_tag="v0.6.0").traverse_commits():
+
+    name=commit.msg.replace('\n', ' ').replace('\r', ' ')
+    print(commit.hash, name)
 
     os.system(f"git checkout {commit.hash}; rm -rf build; ")
-    os.system("python -m pip install .")
-    os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 1')
-    os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 64 -t 1')
-    os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 1')
-    os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 24')
-    os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 24')
+    print("\n\n--------------------\n\n")
+    ret=os.system("python -m pip install .")
+    print(ret)
+
+    if ret != 0:
+        print ("build failed!!!!")
+        print ("build failed!!!!")
+        print ("build failed!!!!")
+        print ("build failed!!!!")
+        continue    
+
+    os.system(f'python examples/speedtest2.py -n "{name}" -d 4 -t 1')
+    os.system(f'python examples/speedtest2.py -n "{name}" -d 64 -t 1')
+    os.system(f'python examples/speedtest2.py -n "{name}" -d 128 -t 1')
+    os.system(f'python examples/speedtest2.py -n "{name}" -d 4 -t 24')
+    os.system(f'python examples/speedtest2.py -n "{name}" -d 128 -t 24')
+
 
diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h
@@ -15,8 +15,25 @@
 #ifdef _MSC_VER
 #include <intrin.h>
 #include <stdexcept>
+#include "cpu_x86.h"
+void cpu_x86::cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
+    __cpuidex(out, eax, ecx);
+}
+__int64 xgetbv(unsigned int x) {
+    return _xgetbv(x);
+}
 #else
 #include <x86intrin.h>
+#include <cpuid.h>
+#include <stdint.h>
+void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
+    __cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
+}
+uint64_t xgetbv(unsigned int index) {
+    uint32_t eax, edx;
+    __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
+    return ((uint64_t)edx << 32) | eax;
+}
 #endif
 
 #if defined(USE_AVX512)
@@ -30,6 +47,65 @@
 #define PORTABLE_ALIGN32 __declspec(align(32))
 #define PORTABLE_ALIGN64 __declspec(align(64))
 #endif
+
+// Adapted from https://github.com/Mysticial/FeatureDetector
+#define _XCR_XFEATURE_ENABLED_MASK  0
+
+bool AVXCapable() {
+    int cpuInfo[4];
+
+    // CPU support
+    cpuid(cpuInfo, 0, 0);
+    int nIds = cpuInfo[0];
+
+    bool HW_AVX = false;
+    if (nIds >= 0x00000001) {
+        cpuid(cpuInfo, 0x00000001, 0);
+        HW_AVX = (cpuInfo[2] & ((int)1 << 28)) != 0;
+    }
+
+    // OS support
+    cpuid(cpuInfo, 1, 0);
+
+    bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
+    bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
+
+    bool avxSupported = false;
+    if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
+        uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+        avxSupported = (xcrFeatureMask & 0x6) == 0x6;
+    }
+    return HW_AVX && avxSupported;
+}
+
+bool AVX512Capable() {
+    if (!AVXCapable()) return false;
+
+    int cpuInfo[4];
+
+    // CPU support
+    cpuid(cpuInfo, 0, 0);
+    int nIds = cpuInfo[0];
+
+    bool HW_AVX512F = false;
+    if (nIds >= 0x00000007) { //  AVX512 Foundation
+        cpuid(cpuInfo, 0x00000007, 0);
+        HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0;
+    }
+
+    // OS support
+    cpuid(cpuInfo, 1, 0);
+
+    bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
+    bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
+
+    bool avx512Supported = false;
+    if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
+        uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+        avx512Supported = (xcrFeatureMask & 0xe6) == 0xe6;
+    }
+    return HW_AVX512F && avx512Supported;
+}
 #endif
 
 #include <queue>
@@ -108,7 +184,6 @@ namespace hnswlib {
 
         return result;
     }
-
 }
 
 #include "space_l2.h"

diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h
@@ -18,7 +18,7 @@ namespace hnswlib {
 
 // Favor using AVX if available.
     static float
-    InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -64,10 +64,12 @@ namespace hnswlib {
         return 1.0f - sum;
 }
 
-#elif defined(USE_SSE)
+#endif
+
+#if defined(USE_SSE)
 
     static float
-    InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -128,7 +130,7 @@ namespace hnswlib {
 #if defined(USE_AVX512)
 
     static float
-    InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN64 TmpRes[16];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -157,10 +159,12 @@ namespace hnswlib {
         return 1.0f - sum;
     }
 
-#elif defined(USE_AVX)
+#endif
+
+#if defined(USE_AVX)
 
     static float
-    InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -195,10 +199,12 @@ namespace hnswlib {
         return 1.0f - sum;
     }
 
-#elif defined(USE_SSE)
+#endif
+
+#if defined(USE_SSE)
 
       static float
-      InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+      InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -245,6 +251,9 @@ namespace hnswlib {
 #endif
 
 #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
+    DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
+    DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
+
     static float
     InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         size_t qty = *((size_t *) qty_ptr);
@@ -283,6 +292,20 @@ namespace hnswlib {
         InnerProductSpace(size_t dim) {
             fstdistfunc_ = InnerProduct;
     #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
+        #if defined(USE_AVX512)
+            if (AVX512Capable())
+                InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
+            else if (AVXCapable())
+                InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
+        #elif defined(USE_AVX)
+            if (AVXCapable())
+                InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
+        #endif
+        #if defined(USE_AVX)
+            if (AVXCapable())
+                InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
+        #endif
+
             if (dim % 16 == 0)
                 fstdistfunc_ = InnerProductSIMD16Ext;
             else if (dim % 4 == 0)

diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
@@ -23,7 +23,7 @@ namespace hnswlib {
 
     // Favor using AVX512 if available.
     static float
-    L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    L2SqrSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
         size_t qty = *((size_t *) qty_ptr);
@@ -52,12 +52,13 @@ namespace hnswlib {
 
         return (res);
 }
+#endif
 
-#elif defined(USE_AVX)
+#if defined(USE_AVX)
 
     // Favor using AVX if available.
     static float
-    L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    L2SqrSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
         size_t qty = *((size_t *) qty_ptr);
@@ -89,10 +90,12 @@ namespace hnswlib {
         return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
     }
 
-#elif defined(USE_SSE)
+#endif
+
+#if defined(USE_SSE)
 
     static float
-    L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    L2SqrSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
         size_t qty = *((size_t *) qty_ptr);
@@ -141,6 +144,8 @@ namespace hnswlib {
 #endif
 
 #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
+    DISTFUNC<float> L2SqrSIMD16Ext = L2SqrSIMD16ExtSSE;
+
     static float
     L2SqrSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         size_t qty = *((size_t *) qty_ptr);
@@ -156,7 +161,7 @@ namespace hnswlib {
 #endif
 
 
-#ifdef USE_SSE
+#if defined(USE_SSE)
     static float
     L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
@@ -208,7 +213,17 @@ namespace hnswlib {
     public:
         L2Space(size_t dim) {
             fstdistfunc_ = L2Sqr;
-        #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
+    #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
+        #if defined(USE_AVX512)
+            if (AVX512Capable())
+                L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX512;
+            else if (AVXCapable())
+                L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
+        #elif defined(USE_AVX)
+            if (AVXCapable())
+                L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
+        #endif
+
             if (dim % 16 == 0)
                 fstdistfunc_ = L2SqrSIMD16Ext;
             else if (dim % 4 == 0)
@@ -217,7 +232,7 @@ namespace hnswlib {
                 fstdistfunc_ = L2SqrSIMD16ExtResiduals;
             else if (dim > 4)
                 fstdistfunc_ = L2SqrSIMD4ExtResiduals;
-        #endif
+    #endif
             dim_ = dim;
             data_size_ = dim * sizeof(float);
         }

diff --git a/python_bindings/LazyIndex.py b/python_bindings/LazyIndex.py
@@ -0,0 +1,44 @@
+import hnswlib
+"""
+    A python wrapper for lazy indexing, preserves the same api as hnswlib.Index but initializes the index only after adding items for the first time with `add_items`.
+"""
+class LazyIndex(hnswlib.Index):
+    def __init__(self, space, dim,max_elements=1024, ef_construction=200, M=16):
+        super().__init__(space, dim)
+        self.init_max_elements=max_elements
+        self.init_ef_construction=ef_construction
+        self.init_M=M
+    def init_index(self, max_elements=0,M=0,ef_construction=0):
+        if max_elements>0:
+            self.init_max_elements=max_elements
+        if ef_construction>0:
+            self.init_ef_construction=ef_construction
+        if M>0:
+            self.init_M=M
+        super().init_index(self.init_max_elements, self.init_M, self.init_ef_construction)
+    def add_items(self, data, ids=None, num_threads=-1):
+        if self.max_elements==0:
+            self.init_index()
+        return super().add_items(data,ids, num_threads)
+    def get_items(self, ids=None):
+        if self.max_elements==0:
+            return []
+        return super().get_items(ids)
+    def knn_query(self, data,k=1, num_threads=-1):
+        if self.max_elements==0:
+            return [], []
+        return super().knn_query(data, k, num_threads)
+    def resize_index(self, size):
+        if self.max_elements==0:
+            return self.init_index(size)
+        else:
+            return super().resize_index(size)
+    def set_ef(self, ef):
+        if self.max_elements==0:
+            self.init_ef_construction=ef
+            return
+        super().set_ef(ef)
+    def get_max_elements(self):
+        return self.max_elements
+    def get_current_count(self):
+        return self.element_count