diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..d2cde965
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+hnswlib.egg-info/
+build/
+dist/
+tmp/
+python_bindings/tests/__pycache__/
+*.pyd
+hnswlib.cpython*.so
+var/
diff --git a/.travis.yml b/.travis.yml
index 6b194926..2c3c9960 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,37 @@
 language: python
 
-matrix:
+jobs:
   include:
-    - python: 3.6
-    - python: 3.7
+    - name: Linux Python 3.6
+      os: linux
+      python: 3.6
+    
+    - name: Linux Python 3.7
+      os: linux
+      python: 3.7
+    
+    - name: Windows Python 3.6
+      os: windows
+      language: shell    # 'language: python' is an error on Travis CI Windows
+      before_install:
+        - choco install python --version 3.6.0
+        - python -m pip install --upgrade pip
+        - python --version
+      env: PATH=/c/Python36:/c/Python36/Scripts:$PATH
+    
+    - name: Windows Python 3.7
+      os: windows
+      language: shell    # 'language: python' is an error on Travis CI Windows
+      before_install:
+        - choco install python --version 3.7.0
+        - python -m pip install --upgrade pip
+        - python --version
+      env: PATH=/c/Python37:/c/Python37/Scripts:$PATH
+
 install:
   - |
-    cd python_bindings
-    pip install -r requirements.txt
-    python setup.py install
+    python -m pip install .
 
 script:
   - |
-    cd python_bindings
-    python setup.py test
+    python -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ebee6e6c..31935e0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,4 +23,6 @@ endif()
 
 add_executable(test_updates examples/updates_test.cpp)
 
+add_executable(searchKnnCloserFirst_test examples/searchKnnCloserFirst_test.cpp)
+
 target_link_libraries(main sift_test) 
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..2d71d12e
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include hnswlib/*.h
+include LICENSE
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..b5e8fda9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,15 @@
+pypi: dist
+	twine upload dist/*
+
+dist:
+	-rm dist/*
+	pip install build
+	python3 -m build --sdist
+
+test:
+	python3 -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
+
+clean:
+	rm -rf *.egg-info build dist tmp var tests/__pycache__ hnswlib.cpython*.so
+
+.PHONY: dist
diff --git a/README.md b/README.md
index 559c5dfd..8d139fdc 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,12 @@ Header-only C++ HNSW implementation with python bindings. Paper's code for the H
 
 **NEWS:**
 
-* **Thanks to Apoorv Sharma [@apoorv-sharma](https://github.com/apoorv-sharma), hnswlib now supports true element updates (the interface remained the same, but when you the perfromance/memory should not degrade as you update the element embeddinds).**
 
-* **Thanks to Dmitry [@2ooom](https://github.com/2ooom), hnswlib got a boost in performance for vector dimensions that are not mutiple of 4** 
+* **hnswlib is now 0.5.0. Added support for pickling indices, support for PEP-517 and PEP-518 building, small speedups, bug and documentation fixes. Many thanks to [@dbespalov](https://github.com/dbespalov), [@dyashuni](https://github.com/dyashuni), [@groodt](https://github.com/groodt),[@uestc-lfs](https://github.com/uestc-lfs), [@vinnitu](https://github.com/vinnitu), [@fabiencastan](https://github.com/fabiencastan), [@JinHai-CN](https://github.com/JinHai-CN), [@js1010](https://github.com/js1010)!**
+
+* **Thanks to Apoorv Sharma [@apoorv-sharma](https://github.com/apoorv-sharma), hnswlib now supports true element updates (the interface remained the same, but when you the performance/memory should not degrade as you update the element embeddings).**
+
+* **Thanks to Dmitry [@2ooom](https://github.com/2ooom), hnswlib got a boost in performance for vector dimensions that are not multiple of 4** 
 
 * **Thanks to Louis Abraham ([@louisabraham](https://github.com/louisabraham)) hnswlib can now be installed via pip!**
 
@@ -37,7 +40,7 @@ For other spaces use the nmslib library https://github.com/nmslib/nmslib.
 #### Short API description
 * `hnswlib.Index(space, dim)` creates a non-initialized index an HNSW in space `space` with integer dimension `dim`.
 
-Index methods:
+`hnswlib.Index` methods:
 * `init_index(max_elements, ef_construction = 200, M = 16, random_seed = 100)` initializes the index from with no elements. 
     * `max_elements` defines the maximum number of elements that can be stored in the structure(can be increased/shrunk).
     * `ef_construction` defines a construction time/accuracy trade-off (see [ALGO_PARAMS.md](ALGO_PARAMS.md)).
@@ -49,14 +52,14 @@ Index methods:
     * `data_labels` specifies the labels for the data. If index already has the elements with the same labels, their features will be updated. Note that update procedure is slower than insertion of a new element, but more memory- and query-efficient.
     * Thread-safe with other `add_items` calls, but not with `knn_query`.
     
-* `mark_deleted(data_label)`  - marks the element as deleted, so it will be ommited from search results.
+* `mark_deleted(data_label)`  - marks the element as deleted, so it will be omitted from search results.
 
 * `resize_index(new_size)` - changes the maximum capacity of the index. Not thread safe with `add_items` and `knn_query`.
 
 * `set_ef(ef)` - sets the query time accuracy/speed trade-off, defined by the `ef` parameter (
 [ALGO_PARAMS.md](ALGO_PARAMS.md)). Note that the parameter is currently not saved along with the index, so you need to set it manually after loading.
 
-* `knn_query(data, k = 1, num_threads = -1)` make a batch query for `k` closests elements for each element of the 
+* `knn_query(data, k = 1, num_threads = -1)` make a batch query for `k` closest elements for each element of the 
     * `data` (shape:`N*dim`). Returns a numpy array of (shape:`N*k`).
     * `num_threads` sets the number of cpu threads to use (-1 means use default).
     * Thread-safe with other `knn_query` calls, but not with `add_items`.
@@ -76,14 +79,34 @@ Index methods:
 
 * `get_current_count()` - returns the current number of element stored in the index
 
-   
-        
+Read-only properties of `hnswlib.Index` class:
+
+* `space` - name of the space (can be one of "l2", "ip", or "cosine"). 
+
+* `dim`   - dimensionality of the space. 
+
+* `M` - parameter that defines the maximum number of outgoing connections in the graph. 
+
+* `ef_construction` - parameter that controls speed/accuracy trade-off during the index construction. 
+
+* `max_elements` - current capacity of the index. Equivalent to `p.get_max_elements()`. 
+
+* `element_count` - number of items in the index. Equivalent to `p.get_current_count()`. 
+
+Properties of `hnswlib.Index` that support reading and writing:
+
+* `ef` - parameter controlling query time/accuracy trade-off.
+
+* `num_threads` - default number of threads to use in `add_items` or `knn_query`. Note that calling `p.set_num_threads(3)` is equivalent to `p.num_threads=3`.
+
+  
         
   
 #### Python bindings examples
 ```python
 import hnswlib
 import numpy as np
+import pickle
 
 dim = 128
 num_elements = 10000
@@ -95,7 +118,7 @@ data_labels = np.arange(num_elements)
 # Declaring index
 p = hnswlib.Index(space = 'l2', dim = dim) # possible options are l2, cosine or ip
 
-# Initing index - the maximum number of elements should be known beforehand
+# Initializing index - the maximum number of elements should be known beforehand
 p.init_index(max_elements = num_elements, ef_construction = 200, M = 16)
 
 # Element insertion (can be called several times):
@@ -106,6 +129,18 @@ p.set_ef(50) # ef should always be > k
 
 # Query dataset, k - number of closest elements (returns 2 numpy arrays)
 labels, distances = p.knn_query(data, k = 1)
+
+# Index objects support pickling
+# WARNING: serialization via pickle.dumps(p) or p.__getstate__() is NOT thread-safe with p.add_items method!
+# Note: ef parameter is included in serialization; random number generator is initialized with random_seed on Index load
+p_copy = pickle.loads(pickle.dumps(p)) # creates a copy of index p using pickle round-trip
+
+### Index parameters are exposed as class properties:
+print(f"Parameters passed to constructor:  space={p_copy.space}, dim={p_copy.dim}") 
+print(f"Index construction: M={p_copy.M}, ef_construction={p_copy.ef_construction}")
+print(f"Index size is {p_copy.element_count} and index capacity is {p_copy.max_elements}")
+print(f"Search speed/quality trade-off parameter: ef={p_copy.ef}")
+
 ```
 
 An example with updates after serialization/deserialization:
@@ -126,7 +161,7 @@ data2 = data[num_elements // 2:]
 # Declaring index
 p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
 
-# Initing index
+# Initializing index
 # max_elements - the maximum number of elements (capacity). Will throw an exception if exceeded
 # during insertion of an element.
 # The capacity can be increased by saving/loading the index, see below.
@@ -160,7 +195,7 @@ print("Saving index to '%s'" % index_path)
 p.save_index("first_half.bin")
 del p
 
-# Reiniting, loading the index
+# Re-initializing, loading the index
 p = hnswlib.Index(space='l2', dim=dim)  # the space can be changed - keeps the data, alters the distance function.
 
 print("\nLoading index from 'first_half.bin'\n")
@@ -181,9 +216,9 @@ print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(dat
 You can install from sources:
 ```bash
 apt-get install -y python-setuptools python-pip
-pip3 install pybind11 numpy setuptools
-cd python_bindings
-python3 setup.py install
+git clone https://github.com/nmslib/hnswlib.git
+cd hnswlib
+pip install .
 ```
 
 or you can install via pip:
@@ -191,7 +226,7 @@ or you can install via pip:
 
 ### Other implementations
 * Non-metric space library (nmslib) - main library(python, C++), supports exotic distances: https://github.com/nmslib/nmslib
-* Faiss libary by facebook, uses own HNSW  implementation for coarse quantization (python, C++):
+* Faiss library by facebook, uses own HNSW  implementation for coarse quantization (python, C++):
 https://github.com/facebookresearch/faiss
 * Code for the paper 
 ["Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors"](https://arxiv.org/abs/1802.02422) 
@@ -203,7 +238,8 @@ https://github.com/dbaranchuk/ivf-hnsw
 * Python implementation (as a part of the clustering code by by Matteo Dell'Amico): https://github.com/matteodellamico/flexible-clustering
 * Java implementation: https://github.com/jelmerk/hnswlib
 * Java bindings using Java Native Access: https://github.com/stepstone-tech/hnswlib-jna
-* .Net implementation:  https://github.com/microsoft/HNSW.Net
+* .Net implementation: https://github.com/microsoft/HNSW.Net
+* CUDA implementation: https://github.com/js1010/cuhnsw
 
 ### Contributing to the repository
 Contributions are highly welcome!
@@ -211,13 +247,15 @@ Contributions are highly welcome!
 Please make pull requests against the `develop` branch.
 
 ### 200M SIFT test reproduction 
-To download and extract the bigann dataset:
+To download and extract the bigann dataset (from root directory):
 ```bash
 python3 download_bigann.py
 ```
 To compile:
 ```bash
-cmake .
+mkdir build
+cd build
+cmake ..
 make all
 ```
 
@@ -226,7 +264,7 @@ To run the test on 200M SIFT subset:
 ./main
 ```
 
-The size of the bigann subset (in millions) is controlled by the variable **subset_size_milllions** hardcoded in **sift_1b.cpp**.
+The size of the BigANN subset (in millions) is controlled by the variable **subset_size_millions** hardcoded in **sift_1b.cpp**.
 
 ### Updates test
 To generate testing data (from root directory):
diff --git a/examples/searchKnnCloserFirst_test.cpp b/examples/searchKnnCloserFirst_test.cpp
new file mode 100644
index 00000000..cc1392c8
--- /dev/null
+++ b/examples/searchKnnCloserFirst_test.cpp
@@ -0,0 +1,84 @@
+// This is a test file for testing the interface
+//  >>> virtual std::vector<std::pair<dist_t, labeltype>>
+//  >>>    searchKnnCloserFirst(const void* query_data, size_t k) const;
+// of class AlgorithmInterface
+
+#include "../hnswlib/hnswlib.h"
+
+#include <assert.h>
+
+#include <vector>
+#include <iostream>
+
+namespace
+{
+
+using idx_t = hnswlib::labeltype;
+
+void test() {
+    int d = 4;
+    idx_t n = 100;
+    idx_t nq = 10;
+    size_t k = 10;
+   
+    std::vector<float> data(n * d);
+    std::vector<float> query(nq * d);
+
+    std::mt19937 rng;
+    rng.seed(47);
+    std::uniform_real_distribution<> distrib;
+
+    for (idx_t i = 0; i < n * d; ++i) {
+        data[i] = distrib(rng);
+    }
+    for (idx_t i = 0; i < nq * d; ++i) {
+        query[i] = distrib(rng);
+    }
+      
+
+    hnswlib::L2Space space(d);
+    hnswlib::AlgorithmInterface<float>* alg_brute  = new hnswlib::BruteforceSearch<float>(&space, 2 * n);
+    hnswlib::AlgorithmInterface<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * n);
+
+    for (size_t i = 0; i < n; ++i) {
+        alg_brute->addPoint(data.data() + d * i, i);
+        alg_hnsw->addPoint(data.data() + d * i, i);
+    }
+
+    // test searchKnnCloserFirst of BruteforceSearch
+    for (size_t j = 0; j < nq; ++j) {
+        const void* p = query.data() + j * d;
+        auto gd = alg_brute->searchKnn(p, k);
+        auto res = alg_brute->searchKnnCloserFirst(p, k);
+        assert(gd.size() == res.size());
+        size_t t = gd.size();
+        while (!gd.empty()) {
+            assert(gd.top() == res[--t]);
+            gd.pop();
+        }
+    }
+    for (size_t j = 0; j < nq; ++j) {
+        const void* p = query.data() + j * d;
+        auto gd = alg_hnsw->searchKnn(p, k);
+        auto res = alg_hnsw->searchKnnCloserFirst(p, k);
+        assert(gd.size() == res.size());
+        size_t t = gd.size();
+        while (!gd.empty()) {
+            assert(gd.top() == res[--t]);
+            gd.pop();
+        }
+    }
+    
+    delete alg_brute;
+    delete alg_hnsw;
+}
+
+} // namespace
+
+int main() {
+    std::cout << "Testing ..." << std::endl;
+    test();
+    std::cout << "Test ok" << std::endl;
+
+    return 0;
+}
diff --git a/hnswlib/bruteforce.h b/hnswlib/bruteforce.h
index 5b1bd655..24260400 100644
--- a/hnswlib/bruteforce.h
+++ b/hnswlib/bruteforce.h
@@ -111,24 +111,6 @@ namespace hnswlib {
             return topResults;
         };
 
-        template <typename Comp>
-        std::vector<std::pair<dist_t, labeltype>>
-        searchKnn(const void* query_data, size_t k, Comp comp) {
-            std::vector<std::pair<dist_t, labeltype>> result;
-            if (cur_element_count == 0) return result;
-
-            auto ret = searchKnn(query_data, k);
-
-            while (!ret.empty()) {
-                result.push_back(ret.top());
-                ret.pop();
-            }
-            
-            std::sort(result.begin(), result.end(), comp);
-
-            return result;
-        }
-
         void saveIndex(const std::string &location) {
             std::ofstream output(location, std::ios::binary);
             std::streampos position;
diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h
index 97bdcd18..a2f72dc7 100644
--- a/hnswlib/hnswalg.h
+++ b/hnswlib/hnswalg.h
@@ -5,10 +5,10 @@
 #include <atomic>
 #include <random>
 #include <stdlib.h>
+#include <assert.h>
 #include <unordered_set>
 #include <list>
 
-
 namespace hnswlib {
     typedef unsigned int tableint;
     typedef unsigned int linklistsizeint;
@@ -26,7 +26,7 @@ namespace hnswlib {
         }
 
         HierarchicalNSW(SpaceInterface<dist_t> *s, size_t max_elements, size_t M = 16, size_t ef_construction = 200, size_t random_seed = 100) :
-                link_list_locks_(max_elements), element_levels_(max_elements), link_list_update_locks_(max_update_element_locks) {
+                link_list_locks_(max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) {
             max_elements_ = max_elements;
 
             has_deletions_=false;
@@ -406,7 +406,7 @@ namespace hnswlib {
                 top_candidates.pop();
             }
 
-            tableint next_closest_entry_point = selectedNeighbors[0];
+            tableint next_closest_entry_point = selectedNeighbors.back();
 
             {
                 linklistsizeint *ll_cur;
@@ -636,7 +636,6 @@ namespace hnswlib {
             if (!input.is_open())
                 throw std::runtime_error("Cannot open file");
 
-
             // get file size:
             input.seekg(0,input.end);
             std::streampos total_filesize=input.tellg();
@@ -868,8 +867,8 @@ namespace hnswlib {
 //                        continue;
 
                     std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>, CompareByFirst> candidates;
-                    int size = sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1;
-                    int elementsToKeep = std::min(int(ef_construction_), size);
+                    size_t size = sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1; // sCand guaranteed to have size >= 1
+                    size_t elementsToKeep = std::min(ef_construction_, size);
                     for (auto&& cand : sCand) {
                         if (cand == neigh)
                             continue;
@@ -892,7 +891,7 @@ namespace hnswlib {
                         std::unique_lock <std::mutex> lock(link_list_locks_[neigh]);
                         linklistsizeint *ll_cur;
                         ll_cur = get_linklist_at_level(neigh, layer);
-                        int candSize = candidates.size();
+                        size_t candSize = candidates.size();
                         setListCount(ll_cur, candSize);
                         tableint *data = (tableint *) (ll_cur + 1);
                         for (size_t idx = 0; idx < candSize; idx++) {
@@ -1136,7 +1135,7 @@ namespace hnswlib {
             }
 
             std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>, CompareByFirst> top_candidates;
-            if (has_deletions_) {                
+            if (has_deletions_) {
                 top_candidates=searchBaseLayerST<true,true>(
                         currObj, query_data, std::max(ef_, k));
             }
@@ -1156,24 +1155,6 @@ namespace hnswlib {
             return result;
         };
 
-        template <typename Comp>
-        std::vector<std::pair<dist_t, labeltype>>
-        searchKnn(const void* query_data, size_t k, Comp comp) {
-            std::vector<std::pair<dist_t, labeltype>> result;
-            if (cur_element_count == 0) return result;
-
-            auto ret = searchKnn(query_data, k);
-
-            while (!ret.empty()) {
-                result.push_back(ret.top());
-                ret.pop();
-            }
-
-            std::sort(result.begin(), result.end(), comp);
-
-            return result;
-        }
-
         void checkIntegrity(){
             int connections_checked=0;
             std::vector <int > inbound_connections_num(cur_element_count,0);
@@ -1185,19 +1166,19 @@ namespace hnswlib {
                     std::unordered_set<tableint> s;
                     for (int j=0; j<size; j++){
                         assert(data[j] > 0);
-                        assert(data[j] < cur_element_count);                                                
+                        assert(data[j] < cur_element_count);
                         assert (data[j] != i);
                         inbound_connections_num[data[j]]++;
                         s.insert(data[j]);
                         connections_checked++;
-                        
+
                     }
                     assert(s.size() == size);
                 }
             }
             if(cur_element_count > 1){
                 int min1=inbound_connections_num[0], max1=inbound_connections_num[0];
-                for(int i=0; i < cur_element_count; i++){                
+                for(int i=0; i < cur_element_count; i++){
                     assert(inbound_connections_num[i] > 0);
                     min1=std::min(inbound_connections_num[i],min1);
                     max1=std::max(inbound_connections_num[i],max1);
@@ -1205,7 +1186,7 @@ namespace hnswlib {
                 std::cout << "Min inbound: " << min1 << ", Max inbound:" << max1 << "\n";
             }
             std::cout << "integrity ok, checked " << connections_checked << " connections\n";
-            
+
         }
 
     };
diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h
index c26f80b5..9409c388 100644
--- a/hnswlib/hnswlib.h
+++ b/hnswlib/hnswlib.h
@@ -71,14 +71,34 @@ namespace hnswlib {
     public:
         virtual void addPoint(const void *datapoint, labeltype label)=0;
         virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
-        template <typename Comp>
-        std::vector<std::pair<dist_t, labeltype>> searchKnn(const void*, size_t, Comp) {
-        }
+
+        // Return k nearest neighbor in the order of closer fist
+        virtual std::vector<std::pair<dist_t, labeltype>>
+            searchKnnCloserFirst(const void* query_data, size_t k) const;
+
         virtual void saveIndex(const std::string &location)=0;
         virtual ~AlgorithmInterface(){
         }
     };
 
+    template<typename dist_t>
+    std::vector<std::pair<dist_t, labeltype>>
+    AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
+        std::vector<std::pair<dist_t, labeltype>> result;
+
+        // here searchKnn returns the result in the order of further first
+        auto ret = searchKnn(query_data, k);
+        {
+            size_t sz = ret.size();
+            result.resize(sz);
+            while (!ret.empty()) {
+                result[--sz] = ret.top();
+                ret.pop();
+            }
+        }
+
+        return result;
+    }
 
 }
 
diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
index bc00af72..e86e13b0 100644
--- a/hnswlib/space_l2.h
+++ b/hnswlib/space_l2.h
@@ -204,7 +204,7 @@ namespace hnswlib {
     };
 
     static int
-    L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
+    L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
 
         size_t qty = *((size_t *) qty_ptr);
         int res = 0;
@@ -226,12 +226,23 @@ namespace hnswlib {
             res += ((*a) - (*b)) * ((*a) - (*b));
             a++;
             b++;
+        }
+        return (res);
+    }
 
+    static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
+        size_t qty = *((size_t*)qty_ptr);
+        int res = 0;
+        unsigned char* a = (unsigned char*)pVect1;
+        unsigned char* b = (unsigned char*)pVect2;
 
+        for(size_t i = 0; i < qty; i++)
+        {
+            res += ((*a) - (*b)) * ((*a) - (*b));
+            a++;
+            b++;
         }
-
         return (res);
-
     }
 
     class L2SpaceI : public SpaceInterface<int> {
@@ -241,7 +252,12 @@ namespace hnswlib {
         size_t dim_;
     public:
         L2SpaceI(size_t dim) {
-            fstdistfunc_ = L2SqrI;
+            if(dim % 4 == 0) {
+                fstdistfunc_ = L2SqrI4x;
+            }
+            else {
+                fstdistfunc_ = L2SqrI;
+            }
             dim_ = dim;
             data_size_ = dim * sizeof(unsigned char);
         }
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..e00b3fb8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel",
+    "numpy>=1.10.0",
+    "pybind11>=2.0",
+]
+
+build-backend = "setuptools.build_meta"
diff --git a/python_bindings/MANIFEST.in b/python_bindings/MANIFEST.in
deleted file mode 100644
index 5a480e4f..00000000
--- a/python_bindings/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-include hnswlib/*.h
\ No newline at end of file
diff --git a/python_bindings/Makefile b/python_bindings/Makefile
deleted file mode 100644
index 02ec523b..00000000
--- a/python_bindings/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-pypi: dist
-	twine upload dist/*
-
-dist:
-	-rm dist/*
-	python3 setup.py sdist
-
-test:
-	python3 setup.py test
-
-clean:
-	rm -rf *.egg-info build dist var first_half.bin tests/__pycache__ hnswlib.cpython-36m-darwin.so
-
-.PHONY: dist
\ No newline at end of file
diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp
index 1b88ca23..87e0c054 100644
--- a/python_bindings/bindings.cpp
+++ b/python_bindings/bindings.cpp
@@ -2,18 +2,21 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
 #include <pybind11/stl.h>
-#include "hnswlib/hnswlib.h"
+#include "hnswlib.h"
 #include <thread>
 #include <atomic>
+#include <stdlib.h>
+#include <assert.h>
 
 namespace py = pybind11;
+using namespace pybind11::literals; // needed to bring in _a literal
 
 /*
  * replacement for the openmp '#pragma omp parallel for' directive
  * only handles a subset of functionality (no reductions etc)
  * Process ids from start (inclusive) to end (EXCLUSIVE)
  *
- * The method is borrowed from nmslib 
+ * The method is borrowed from nmslib
  */
 template<class Function>
 inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) {
@@ -71,27 +74,58 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
 
 }
 
+    inline void assert_true(bool expr, const std::string & msg) {
+      if (expr == false)
+        throw std::runtime_error("Unpickle Error: "+msg);
+      return;
+    }
+
+
+
 template<typename dist_t, typename data_t=float>
 class Index {
 public:
-    Index(const std::string &space_name, const int dim) :
-            space_name(space_name), dim(dim) {
-        normalize=false;
-        if(space_name=="l2") {
-            l2space = new hnswlib::L2Space(dim);
-        }
-        else if(space_name=="ip") {
-            l2space = new hnswlib::InnerProductSpace(dim);
-        }
-        else if(space_name=="cosine") {
-            l2space = new hnswlib::InnerProductSpace(dim);
-            normalize=true;
-        }
-        appr_alg = NULL;
-        ep_added = true;
-        index_inited = false;
-        num_threads_default = std::thread::hardware_concurrency();
+  Index(const std::string &space_name, const int dim) :
+  space_name(space_name), dim(dim) {
+    normalize=false;
+    if(space_name=="l2") {
+      l2space = new hnswlib::L2Space(dim);
+    }
+    else if(space_name=="ip") {
+      l2space = new hnswlib::InnerProductSpace(dim);
+    }
+    else if(space_name=="cosine") {
+      l2space = new hnswlib::InnerProductSpace(dim);
+      normalize=true;
     }
+    appr_alg = NULL;
+    ep_added = true;
+    index_inited = false;
+    num_threads_default = std::thread::hardware_concurrency();
+
+    default_ef=10;
+  }
+
+  static const int ser_version = 1; // serialization version
+
+  std::string space_name;
+  int dim;
+  size_t seed;
+  size_t default_ef;
+
+  bool index_inited;
+  bool ep_added;
+  bool normalize;
+  int num_threads_default;
+  hnswlib::labeltype cur_l;
+  hnswlib::HierarchicalNSW<dist_t> *appr_alg;
+  hnswlib::SpaceInterface<float> *l2space;
+
+  ~Index() {
+      delete l2space;
+      if (appr_alg)
+          delete appr_alg;
+  }
 
     void init_new_index(const size_t maxElements, const size_t M, const size_t efConstruction, const size_t random_seed) {
         if (appr_alg) {
@@ -101,19 +135,17 @@ class Index {
         appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, maxElements, M, efConstruction, random_seed);
         index_inited = true;
         ep_added = false;
+        appr_alg->ef_ = default_ef;
+        seed=random_seed;
     }
 
+
     void set_ef(size_t ef) {
+      default_ef=ef;
+      if (appr_alg)
         appr_alg->ef_ = ef;
     }
 
-    size_t get_ef_construction() {
-        return appr_alg->ef_construction_;
-    }
-
-    size_t get_M() {
-        return appr_alg->M_;
-    }
 
     void set_num_threads(int num_threads) {
         this->num_threads_default = num_threads;
@@ -124,21 +156,22 @@ class Index {
     }
 
     void loadIndex(const std::string &path_to_index, size_t max_elements) {
-        if (appr_alg) {
-            std::cerr<<"Warning: Calling load_index for an already inited index. Old index is being deallocated.";
-            delete appr_alg;
-        }
-        appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, path_to_index, false, max_elements);
-		cur_l = appr_alg->cur_element_count;
+      if (appr_alg) {
+          std::cerr<<"Warning: Calling load_index for an already inited index. Old index is being deallocated.";
+          delete appr_alg;
+      }
+      appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, path_to_index, false, max_elements);
+      cur_l = appr_alg->cur_element_count;
+    }
+
+    void normalize_vector(float *data, float *norm_array){
+        float norm=0.0f;
+        for(int i=0;i<dim;i++)
+            norm+=data[i]*data[i];
+        norm= 1.0f / (sqrtf(norm) + 1e-30f);
+        for(int i=0;i<dim;i++)
+            norm_array[i]=data[i]*norm;
     }
-	void normalize_vector(float *data, float *norm_array){
-		float norm=0.0f;
-		for(int i=0;i<dim;i++)
-			norm+=data[i]*data[i];
-		norm= 1.0f / (sqrtf(norm) + 1e-30f);
-		for(int i=0;i<dim;i++)
-			norm_array[i]=data[i]*norm;
-	}
 
     void addItems(py::object input, py::object ids_ = py::none(), int num_threads = -1) {
         py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input);
@@ -162,7 +195,6 @@ class Index {
             throw std::runtime_error("wrong dimensionality of the vectors");
 
         // avoid using threads when the number of searches is small:
-
         if(rows<=num_threads*4){
             num_threads=1;
         }
@@ -189,20 +221,19 @@ class Index {
 
         {
 
-            int start = 0;
-            if (!ep_added) {
-                size_t id = ids.size() ? ids.at(0) : (cur_l);
-				float *vector_data=(float *) items.data(0);
-                                std::vector<float> norm_array(dim);
-				if(normalize){					
-					normalize_vector(vector_data, norm_array.data());					
-					vector_data = norm_array.data();
-					
-				}
-				appr_alg->addPoint((void *) vector_data, (size_t) id);
-                start = 1;
-                ep_added = true;
+          int start = 0;
+          if (!ep_added) {
+            size_t id = ids.size() ? ids.at(0) : (cur_l);
+            float *vector_data=(float *) items.data(0);
+            std::vector<float> norm_array(dim);
+            if(normalize){
+              normalize_vector(vector_data, norm_array.data());
+              vector_data = norm_array.data();
             }
+            appr_alg->addPoint((void *) vector_data, (size_t) id);
+            start = 1;
+            ep_added = true;
+          }
 
             py::gil_scoped_release l;
             if(normalize==false) {
@@ -214,7 +245,7 @@ class Index {
                 std::vector<float> norm_array(num_threads * dim);
                 ParallelFor(start, rows, num_threads, [&](size_t row, size_t threadId) {
                     // normalize vector:
-					size_t start_idx = threadId * dim;
+                    size_t start_idx = threadId * dim;
                     normalize_vector((float *) items.data(row), (norm_array.data()+start_idx));
 
                     size_t id = ids.size() ? ids.at(row) : (cur_l+row);
@@ -254,6 +285,255 @@ class Index {
         return ids;
     }
 
+
+    py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
+
+
+
+      std::unique_lock <std::mutex> templock(appr_alg->global);
+
+      unsigned int level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
+      unsigned int link_npy_size = 0;
+      std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
+
+      for (size_t i = 0; i < appr_alg->cur_element_count; i++){
+        unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
+        link_npy_offsets[i]=link_npy_size;
+        if (linkListSize)
+          link_npy_size += linkListSize;
+      }
+
+      char* data_level0_npy = (char *) malloc(level0_npy_size);
+      char* link_list_npy = (char *) malloc(link_npy_size);
+      int* element_levels_npy = (int *) malloc(appr_alg->element_levels_.size()*sizeof(int));
+
+      hnswlib::labeltype* label_lookup_key_npy = (hnswlib::labeltype *) malloc(appr_alg->label_lookup_.size()*sizeof(hnswlib::labeltype));
+      hnswlib::tableint*  label_lookup_val_npy = (hnswlib::tableint *)  malloc(appr_alg->label_lookup_.size()*sizeof(hnswlib::tableint));
+
+      memset(label_lookup_key_npy, -1, appr_alg->label_lookup_.size()*sizeof(hnswlib::labeltype));
+      memset(label_lookup_val_npy, -1, appr_alg->label_lookup_.size()*sizeof(hnswlib::tableint));
+
+      size_t idx=0;
+      for ( auto it = appr_alg->label_lookup_.begin(); it != appr_alg->label_lookup_.end(); ++it ){
+        label_lookup_key_npy[idx]= it->first;
+        label_lookup_val_npy[idx]= it->second;
+        idx++;
+      }
+
+      memset(link_list_npy, 0, link_npy_size);
+
+      memcpy(data_level0_npy, appr_alg->data_level0_memory_, level0_npy_size);
+      memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int));
+
+      for (size_t i = 0; i < appr_alg->cur_element_count; i++){
+        unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
+        if (linkListSize){
+          memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize);
+        }
+      }
+
+      py::capsule free_when_done_l0(data_level0_npy, [](void *f) {
+          delete[] f;
+      });
+      py::capsule free_when_done_lvl(element_levels_npy, [](void *f) {
+          delete[] f;
+      });
+      py::capsule free_when_done_lb(label_lookup_key_npy, [](void *f) {
+          delete[] f;
+      });
+      py::capsule free_when_done_id(label_lookup_val_npy, [](void *f) {
+          delete[] f;
+      });
+      py::capsule free_when_done_ll(link_list_npy, [](void *f) {
+          delete[] f;
+      });
+
+      /*  TODO: serialize state of random generators appr_alg->level_generator_ and appr_alg->update_probability_generator_  */
+      /*        for full reproducibility / to avoid re-initializing generators inside Index::createFromParams         */
+
+      return py::dict(
+                      "offset_level0"_a=appr_alg->offsetLevel0_,
+                      "max_elements"_a=appr_alg->max_elements_,
+                      "cur_element_count"_a=appr_alg->cur_element_count,
+                      "size_data_per_element"_a=appr_alg->size_data_per_element_,
+                      "label_offset"_a=appr_alg->label_offset_,
+                      "offset_data"_a=appr_alg->offsetData_,
+                      "max_level"_a=appr_alg->maxlevel_,
+                      "enterpoint_node"_a=appr_alg->enterpoint_node_,
+                      "max_M"_a=appr_alg->maxM_,
+                      "max_M0"_a=appr_alg->maxM0_,
+                      "M"_a=appr_alg->M_,
+                      "mult"_a=appr_alg->mult_,
+                      "ef_construction"_a=appr_alg->ef_construction_,
+                      "ef"_a=appr_alg->ef_,
+                      "has_deletions"_a=appr_alg->has_deletions_,
+                      "size_links_per_element"_a=appr_alg->size_links_per_element_,
+
+                      "label_lookup_external"_a=py::array_t<hnswlib::labeltype>(
+                              {appr_alg->label_lookup_.size()}, // shape
+                              {sizeof(hnswlib::labeltype)}, // C-style contiguous strides for double
+                              label_lookup_key_npy, // the data pointer
+                              free_when_done_lb),
+
+                      "label_lookup_internal"_a=py::array_t<hnswlib::tableint>(
+                              {appr_alg->label_lookup_.size()}, // shape
+                              {sizeof(hnswlib::tableint)}, // C-style contiguous strides for double
+                              label_lookup_val_npy, // the data pointer
+                              free_when_done_id),
+
+                      "element_levels"_a=py::array_t<int>(
+                              {appr_alg->element_levels_.size()}, // shape
+                              {sizeof(int)}, // C-style contiguous strides for double
+                              element_levels_npy, // the data pointer
+                              free_when_done_lvl),
+
+                              // linkLists_,element_levels_,data_level0_memory_
+                      "data_level0"_a=py::array_t<char>(
+                              {level0_npy_size}, // shape
+                              {sizeof(char)}, // C-style contiguous strides for double
+                              data_level0_npy, // the data pointer
+                              free_when_done_l0),
+
+                      "link_lists"_a=py::array_t<char>(
+                              {link_npy_size}, // shape
+                              {sizeof(char)}, // C-style contiguous strides for double
+                              link_list_npy, // the data pointer
+                              free_when_done_ll)
+
+                    );
+
+
+    }
+
+
+    py::dict getIndexParams() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
+        auto params = py::dict(
+                            "ser_version"_a=py::int_(Index<float>::ser_version), //serialization version
+                            "space"_a=space_name,
+                            "dim"_a=dim,
+                            "index_inited"_a=index_inited,
+                            "ep_added"_a=ep_added,
+                            "normalize"_a=normalize,
+                            "num_threads"_a=num_threads_default,
+                            "seed"_a=seed
+                            );
+
+        if(index_inited == false)
+            return py::dict( **params, "ef"_a=default_ef);
+
+        auto ann_params = getAnnData();
+
+        return py::dict(**params, **ann_params);
+    }
+
+
+    static Index<float> * createFromParams(const py::dict d) {
+
+      // check serialization version
+      assert_true(((int)py::int_(Index<float>::ser_version)) >= d["ser_version"].cast<int>(), "Invalid serialization version!");
+
+      auto space_name_=d["space"].cast<std::string>();
+      auto dim_=d["dim"].cast<int>();
+      auto index_inited_=d["index_inited"].cast<bool>();
+
+      Index<float> *new_index = new Index<float>(space_name_, dim_);
+
+      /*  TODO: deserialize state of random generators into new_index->level_generator_ and new_index->update_probability_generator_  */
+      /*        for full reproducibility / state of generators is serialized inside Index::getIndexParams                      */
+      new_index->seed = d["seed"].cast<size_t>();
+
+      if (index_inited_){
+        new_index->appr_alg = new hnswlib::HierarchicalNSW<dist_t>(new_index->l2space, d["max_elements"].cast<size_t>(), d["M"].cast<size_t>(), d["ef_construction"].cast<size_t>(), new_index->seed);
+        new_index->cur_l = d["cur_element_count"].cast<size_t>();
+      }
+
+      new_index->index_inited = index_inited_;
+      new_index->ep_added=d["ep_added"].cast<bool>();
+      new_index->num_threads_default=d["num_threads"].cast<int>();
+      new_index->default_ef=d["ef"].cast<size_t>();
+
+      if (index_inited_)
+        new_index->setAnnData(d);
+
+      return new_index;
+    }
+
+    static Index<float> * createFromIndex(const Index<float> & index) {
+        return createFromParams(index.getIndexParams());
+    }
+
+    void setAnnData(const py::dict d) { /* WARNING: Index::setAnnData is not thread-safe with Index::addItems */
+
+
+      std::unique_lock <std::mutex> templock(appr_alg->global);
+
+      assert_true(appr_alg->offsetLevel0_ == d["offset_level0"].cast<size_t>(), "Invalid value of offsetLevel0_ ");
+      assert_true(appr_alg->max_elements_ == d["max_elements"].cast<size_t>(), "Invalid value of max_elements_ ");
+
+      appr_alg->cur_element_count = d["cur_element_count"].cast<size_t>();
+
+      assert_true(appr_alg->size_data_per_element_ == d["size_data_per_element"].cast<size_t>(), "Invalid value of size_data_per_element_ ");
+      assert_true(appr_alg->label_offset_ == d["label_offset"].cast<size_t>(), "Invalid value of label_offset_ ");
+      assert_true(appr_alg->offsetData_ == d["offset_data"].cast<size_t>(), "Invalid value of offsetData_ ");
+
+      appr_alg->maxlevel_ = d["max_level"].cast<int>();
+      appr_alg->enterpoint_node_ = d["enterpoint_node"].cast<hnswlib::tableint>();
+
+      assert_true(appr_alg->maxM_ == d["max_M"].cast<size_t>(), "Invalid value of maxM_ ");
+      assert_true(appr_alg->maxM0_ == d["max_M0"].cast<size_t>(), "Invalid value of maxM0_ ");
+      assert_true(appr_alg->M_ == d["M"].cast<size_t>(), "Invalid value of M_ ");
+      assert_true(appr_alg->mult_ == d["mult"].cast<double>(), "Invalid value of mult_ ");
+      assert_true(appr_alg->ef_construction_ == d["ef_construction"].cast<size_t>(), "Invalid value of ef_construction_ ");
+
+      appr_alg->ef_ = d["ef"].cast<size_t>();
+      appr_alg->has_deletions_=d["has_deletions"].cast<bool>();
+
+      assert_true(appr_alg->size_links_per_element_ == d["size_links_per_element"].cast<size_t>(), "Invalid value of size_links_per_element_ ");
+
+      auto label_lookup_key_npy = d["label_lookup_external"].cast<py::array_t < hnswlib::labeltype, py::array::c_style | py::array::forcecast > >();
+      auto label_lookup_val_npy = d["label_lookup_internal"].cast<py::array_t < hnswlib::tableint, py::array::c_style | py::array::forcecast > >();
+      auto element_levels_npy = d["element_levels"].cast<py::array_t < int, py::array::c_style | py::array::forcecast > >();
+      auto data_level0_npy = d["data_level0"].cast<py::array_t < char, py::array::c_style | py::array::forcecast > >();
+      auto link_list_npy = d["link_lists"].cast<py::array_t < char, py::array::c_style | py::array::forcecast > >();
+
+      for (size_t i = 0; i < appr_alg->cur_element_count; i++){
+        if (label_lookup_val_npy.data()[i] < 0){
+            throw std::runtime_error("internal id cannot be negative!");
+          }
+        else{
+          appr_alg->label_lookup_.insert(std::make_pair(label_lookup_key_npy.data()[i], label_lookup_val_npy.data()[i]));
+        }
+      }
+
+      memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes());
+
+      unsigned int link_npy_size = 0;
+      std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
+
+      for (size_t i = 0; i < appr_alg->cur_element_count; i++){
+        unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
+        link_npy_offsets[i]=link_npy_size;
+        if (linkListSize)
+          link_npy_size += linkListSize;
+      }
+
+      memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes());
+
+      for (size_t i = 0; i < appr_alg->max_elements_; i++) {
+          unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
+          if (linkListSize == 0) {
+              appr_alg->linkLists_[i] = nullptr;
+          } else {
+            appr_alg->linkLists_[i] = (char *) malloc(linkListSize);
+            if (appr_alg->linkLists_[i] == nullptr)
+                throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklist");
+
+            memcpy(appr_alg->linkLists_[i], link_list_npy.data()+link_npy_offsets[i], linkListSize);
+
+          }
+      }
+}
+
     py::object knnQuery_return_numpy(py::object input, size_t k = 1, int num_threads = -1) {
 
         py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input);
@@ -310,7 +590,7 @@ class Index {
                                 float *data= (float *) items.data(row);
 
                                 size_t start_idx = threadId * dim;
-								normalize_vector((float *) items.data(row), (norm_array.data()+start_idx));
+                                normalize_vector((float *) items.data(row), (norm_array.data()+start_idx));
 
                                 std::priority_queue<std::pair<dist_t, hnswlib::labeltype >> result = appr_alg->searchKnn(
                                         (void *) (norm_array.data()+start_idx), k);
@@ -367,50 +647,69 @@ class Index {
         return appr_alg->cur_element_count;
     }
 
-    std::string space_name;
-    int dim;
-
+};
 
-    bool index_inited;
-    bool ep_added;
-    bool normalize;
-    int num_threads_default;
-    hnswlib::labeltype cur_l;
-    hnswlib::HierarchicalNSW<dist_t> *appr_alg;
-    hnswlib::SpaceInterface<float> *l2space;
 
-    ~Index() {
-        delete l2space;
-        if (appr_alg)
-            delete appr_alg;
-    }
-};
 
 PYBIND11_PLUGIN(hnswlib) {
         py::module m("hnswlib");
 
         py::class_<Index<float>>(m, "Index")
+        .def(py::init(&Index<float>::createFromParams), py::arg("params"))
+           /* WARNING: Index::createFromIndex is not thread-safe with Index::addItems */
+        .def(py::init(&Index<float>::createFromIndex), py::arg("index"))
         .def(py::init<const std::string &, const int>(), py::arg("space"), py::arg("dim"))
-        .def("init_index", &Index<float>::init_new_index, py::arg("max_elements"), py::arg("M")=16,
-        py::arg("ef_construction")=200, py::arg("random_seed")=100)
+        .def("init_index", &Index<float>::init_new_index, py::arg("max_elements"), py::arg("M")=16, py::arg("ef_construction")=200, py::arg("random_seed")=100)
         .def("knn_query", &Index<float>::knnQuery_return_numpy, py::arg("data"), py::arg("k")=1, py::arg("num_threads")=-1)
         .def("add_items", &Index<float>::addItems, py::arg("data"), py::arg("ids") = py::none(), py::arg("num_threads")=-1)
         .def("get_items", &Index<float, float>::getDataReturnList, py::arg("ids") = py::none())
         .def("get_ids_list", &Index<float>::getIdsList)
         .def("set_ef", &Index<float>::set_ef, py::arg("ef"))
-        .def("get_ef_construction", &Index<float>::get_ef_construction)
-        .def("get_M", &Index<float>::get_M)
         .def("set_num_threads", &Index<float>::set_num_threads, py::arg("num_threads"))
         .def("save_index", &Index<float>::saveIndex, py::arg("path_to_index"))
         .def("load_index", &Index<float>::loadIndex, py::arg("path_to_index"), py::arg("max_elements")=0)
         .def("mark_deleted", &Index<float>::markDeleted, py::arg("label"))
         .def("resize_index", &Index<float>::resizeIndex, py::arg("new_size"))
-        .def("get_max_elements", &Index<float>::getMaxElements)
-        .def("get_current_count", &Index<float>::getCurrentCount)
-        .def("__repr__",
-        [](const Index<float> &a) {
-            return "<HNSW-lib index>";
-        }
-        );
+        .def_readonly("space", &Index<float>::space_name)
+        .def_readonly("dim", &Index<float>::dim)
+        .def_readwrite("num_threads", &Index<float>::num_threads_default)
+        .def_property("ef",
+          [](const Index<float> & index) {
+            return index.index_inited ? index.appr_alg->ef_ : index.default_ef;
+          },
+          [](Index<float> & index, const size_t ef_) {
+            index.default_ef=ef_;
+            if (index.appr_alg)
+              index.appr_alg->ef_ = ef_;
+        })
+        .def_property_readonly("max_elements", [](const Index<float> & index) {
+            return index.index_inited ? index.appr_alg->max_elements_ : 0;
+        })
+        .def_property_readonly("element_count", [](const Index<float> & index) {
+            return index.index_inited ? index.appr_alg->cur_element_count : 0;
+        })
+        .def_property_readonly("ef_construction", [](const Index<float> & index) {
+          return index.index_inited ? index.appr_alg->ef_construction_ : 0;
+        })
+        .def_property_readonly("M",  [](const Index<float> & index) {
+          return index.index_inited ? index.appr_alg->M_ : 0;
+        })
+
+        .def(py::pickle(
+            [](const Index<float> &ind) { // __getstate__
+                return py::make_tuple(ind.getIndexParams()); /* Return dict (wrapped in a tuple) that fully encodes state of the Index object */
+            },
+            [](py::tuple t) { // __setstate__
+                if (t.size() != 1)
+                    throw std::runtime_error("Invalid state!");
+
+                return Index<float>::createFromParams(t[0].cast<py::dict>());
+            }
+        ))
+
+        .def("__repr__", [](const Index<float> &a) {
+            return "<hnswlib.Index(space='" + a.space_name + "', dim="+std::to_string(a.dim)+")>";
+        });
+
         return m.ptr();
 }
diff --git a/python_bindings/hnswlib b/python_bindings/hnswlib
deleted file mode 120000
index 236d6575..00000000
--- a/python_bindings/hnswlib
+++ /dev/null
@@ -1 +0,0 @@
-../hnswlib
\ No newline at end of file
diff --git a/python_bindings/requirements.txt b/python_bindings/requirements.txt
deleted file mode 100644
index 81fbf192..00000000
--- a/python_bindings/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy>=1.10.0
-pybind11>=2.0
\ No newline at end of file
diff --git a/python_bindings/setup.py b/python_bindings/setup.py
deleted file mode 100644
index a6dfb81b..00000000
--- a/python_bindings/setup.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-from setuptools import setup, Extension
-from setuptools.command.build_ext import build_ext
-import sys
-import setuptools
-
-__version__ = '0.4.0'
-
-
-source_files = ['bindings.cpp']
-
-libraries = []
-extra_objects = []
-
-
-ext_modules = [
-    Extension(
-        'hnswlib',
-        source_files,
-        # include_dirs=[os.path.join(libdir, "include")],
-        libraries=libraries,
-        language='c++',
-        extra_objects=extra_objects,
-    ),
-]
-
-
-# As of Python 3.6, CCompiler has a `has_flag` method.
-# cf http://bugs.python.org/issue26689
-def has_flag(compiler, flagname):
-    """Return a boolean indicating whether a flag name is supported on
-    the specified compiler.
-    """
-    import tempfile
-    with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
-        f.write('int main (int argc, char **argv) { return 0; }')
-        try:
-            compiler.compile([f.name], extra_postargs=[flagname])
-        except setuptools.distutils.errors.CompileError:
-            return False
-    return True
-
-
-def cpp_flag(compiler):
-    """Return the -std=c++[11/14] compiler flag.
-    The c++14 is prefered over c++11 (when it is available).
-    """
-    if has_flag(compiler, '-std=c++14'):
-        return '-std=c++14'
-    elif has_flag(compiler, '-std=c++11'):
-        return '-std=c++11'
-    else:
-        raise RuntimeError('Unsupported compiler -- at least C++11 support '
-                           'is needed!')
-
-
-class BuildExt(build_ext):
-    """A custom build extension for adding compiler-specific options."""
-    c_opts = {
-        'msvc': ['/EHsc', '/openmp', '/O2'],
-        'unix': ['-O3', '-march=native'],  # , '-w'
-    }
-    link_opts = {
-        'unix': [],
-        'msvc': [],
-    }
-
-    if sys.platform == 'darwin':
-        c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
-        link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
-    else:
-        c_opts['unix'].append("-fopenmp")
-        link_opts['unix'].extend(['-fopenmp', '-pthread'])
-
-    def build_extensions(self):
-        ct = self.compiler.compiler_type
-        opts = self.c_opts.get(ct, [])
-        if ct == 'unix':
-            opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
-            opts.append(cpp_flag(self.compiler))
-            if has_flag(self.compiler, '-fvisibility=hidden'):
-                opts.append('-fvisibility=hidden')
-        elif ct == 'msvc':
-            opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
-
-        # extend include dirs here (don't assume numpy/pybind11 are installed when first run, since
-        # pip could have installed them as part of executing this script
-        import pybind11
-        import numpy as np
-        for ext in self.extensions:
-            ext.extra_compile_args.extend(opts)
-            ext.extra_link_args.extend(self.link_opts.get(ct, []))
-            ext.include_dirs.extend([
-                # Path to pybind11 headers
-                pybind11.get_include(),
-                pybind11.get_include(True),
-
-                # Path to numpy headers
-                np.get_include()
-            ])
-
-        build_ext.build_extensions(self)
-
-
-setup(
-    name='hnswlib',
-    version=__version__,
-    description='hnswlib',
-    author='Yury Malkov and others',
-    url='https://github.com/yurymalkov/hnsw',
-    long_description="""hnsw""",
-    ext_modules=ext_modules,
-    install_requires=['pybind11>=2.0', 'numpy'],
-    cmdclass={'build_ext': BuildExt},
-    test_suite="tests",
-    zip_safe=False,
-)
diff --git a/python_bindings/setup.py b/python_bindings/setup.py
new file mode 120000
index 00000000..f8f80fc2
--- /dev/null
+++ b/python_bindings/setup.py
@@ -0,0 +1 @@
+../setup.py
\ No newline at end of file
diff --git a/python_bindings/tests/bindings_test.py b/python_bindings/tests/bindings_test.py
index afc663af..d718bc3b 100644
--- a/python_bindings/tests/bindings_test.py
+++ b/python_bindings/tests/bindings_test.py
@@ -1,10 +1,13 @@
+import os
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testRandomSelf(self):
-        import hnswlib
-        import numpy as np
 
         dim = 16
         num_elements = 10000
@@ -40,19 +43,19 @@ def testRandomSelf(self):
 
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data1, k=1)
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
 
         # Serializing and deleting the index:
-        index_path='first_half.bin'
+        index_path = 'first_half.bin'
         print("Saving index to '%s'" % index_path)
-        p.save_index("first_half.bin")
+        p.save_index(index_path)
         del p
 
         # Reiniting, loading the index
         p = hnswlib.Index(space='l2', dim=dim)  # you can change the sa
 
-        print("\nLoading index from 'first_half.bin'\n")
-        p.load_index("first_half.bin")
+        print("\nLoading index from '%s'\n" % index_path)
+        p.load_index(index_path)
 
         print("Adding the second batch of %d elements" % (len(data2)))
         p.add_items(data2)
@@ -60,8 +63,6 @@ def testRandomSelf(self):
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data, k=1)
 
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
+        
+        os.remove(index_path)
diff --git a/python_bindings/tests/bindings_test_getdata.py b/python_bindings/tests/bindings_test_getdata.py
index 3e234518..8655d7f8 100644
--- a/python_bindings/tests/bindings_test_getdata.py
+++ b/python_bindings/tests/bindings_test_getdata.py
@@ -1,11 +1,13 @@
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testGettingItems(self):
         print("\n**** Getting the data by label test ****\n")
-        import hnswlib
-        import numpy as np
 
         dim = 16
         num_elements = 10000
@@ -42,6 +44,3 @@ def testGettingItems(self):
         # After adding them, all labels should be retrievable
         returned_items = p.get_items(labels)
         self.assertSequenceEqual(data.tolist(), returned_items)
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
diff --git a/python_bindings/tests/bindings_test_labels.py b/python_bindings/tests/bindings_test_labels.py
index c1887bef..5c13e198 100644
--- a/python_bindings/tests/bindings_test_labels.py
+++ b/python_bindings/tests/bindings_test_labels.py
@@ -1,126 +1,127 @@
+import os
 import unittest
 
+import numpy as np
 
-class RandomSelfTestCase(unittest.TestCase):
-    def testRandomSelf(self):
-      for idx in range(16):
-        print("\n**** Index save-load test ****\n")
-        import hnswlib
-        import numpy as np
-        
-        np.random.seed(idx)
-        dim = 16
-        num_elements = 10000
-
-        # Generating sample data
-        data = np.float32(np.random.random((num_elements, dim)))
-
-        # Declaring index
-        p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
-
-        # Initing index
-        # max_elements - the maximum number of elements, should be known beforehand
-        #     (probably will be made optional in the future)
-        #
-        # ef_construction - controls index search speed/build speed tradeoff
-        # M - is tightly connected with internal dimensionality of the data
-        #     stronlgy affects the memory consumption
-
-        p.init_index(max_elements = num_elements, ef_construction = 100, M = 16)
-
-        # Controlling the recall by setting ef:
-        # higher ef leads to better accuracy, but slower search
-        p.set_ef(100)
-
-        p.set_num_threads(4)  # by default using all available cores
-
-        # We split the data in two batches:
-        data1 = data[:num_elements // 2]
-        data2 = data[num_elements // 2:]
-
-        print("Adding first batch of %d elements" % (len(data1)))
-        p.add_items(data1)
-
-        # Query the elements for themselves and measure recall:
-        labels, distances = p.knn_query(data1, k=1)
-
-        items=p.get_items(labels)
-
-        # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
-
-        # Check that the returned element data is correct:
-        diff_with_gt_labels=np.mean(np.abs(data1-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
-
-        # Serializing and deleting the index.
-        # We need the part to check that serialization is working properly.
-
-        index_path='first_half.bin'
-        print("Saving index to '%s'" % index_path)
-        p.save_index("first_half.bin")
-        print("Saved. Deleting...")
-        del p
-        print("Deleted")
+import hnswlib
 
-        print("\n**** Mark delete test ****\n")
-        # Reiniting, loading the index
-        print("Reiniting")
-        p = hnswlib.Index(space='l2', dim=dim)
 
-        print("\nLoading index from 'first_half.bin'\n")
-        p.load_index("first_half.bin")
-        p.set_ef(100)
-
-        print("Adding the second batch of %d elements" % (len(data2)))
-        p.add_items(data2)
-
-        # Query the elements for themselves and measure recall:
-        labels, distances = p.knn_query(data, k=1)
-        items=p.get_items(labels)
-
-        # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
-
-        # Check that the returned element data is correct:
-        diff_with_gt_labels=np.mean(np.abs(data-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4) # deleting index.
-
-        # Checking that all labels are returned correctly:
-        sorted_labels=sorted(p.get_ids_list())
-        self.assertEqual(np.sum(~np.asarray(sorted_labels)==np.asarray(range(num_elements))),0)
-
-        # Delete data1
-        labels1, _ = p.knn_query(data1, k=1)
-
-        for l in labels1:
-            p.mark_deleted(l[0])
-        labels2, _ = p.knn_query(data2, k=1)
-        items=p.get_items(labels2)
-        diff_with_gt_labels=np.mean(np.abs(data2-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-3) # console
-
-
-        labels1_after, _ = p.knn_query(data1, k=1)
-        for la in labels1_after:
-            for lb in labels1:
-                if la[0] == lb[0]:
-                    self.assertTrue(False)
-        print("All the data in data1 are removed")
-
-        # checking saving/loading index with elements marked as deleted
-        p.save_index("with_deleted.bin")
-        p = hnswlib.Index(space='l2', dim=dim)
-        p.load_index("with_deleted.bin")
-        p.set_ef(100)
-
-        labels1_after, _ = p.knn_query(data1, k=1)
-        for la in labels1_after:
-            for lb in labels1:
-                if la[0] == lb[0]:
-                    self.assertTrue(False)
+class RandomSelfTestCase(unittest.TestCase):
+    def testRandomSelf(self):
+        for idx in range(16):
+            print("\n**** Index save-load test ****\n")
 
+            np.random.seed(idx)
+            dim = 16
+            num_elements = 10000
 
+            # Generating sample data
+            data = np.float32(np.random.random((num_elements, dim)))
 
-if __name__ == "__main__":
-    unittest.main()
+            # Declaring index
+            p = hnswlib.Index(space='l2', dim=dim)  # possible options are l2, cosine or ip
+
+            # Initing index
+            # max_elements - the maximum number of elements, should be known beforehand
+            #     (probably will be made optional in the future)
+            #
+            # ef_construction - controls index search speed/build speed tradeoff
+            # M - is tightly connected with internal dimensionality of the data
+            #     stronlgy affects the memory consumption
+
+            p.init_index(max_elements=num_elements, ef_construction=100, M=16)
+
+            # Controlling the recall by setting ef:
+            # higher ef leads to better accuracy, but slower search
+            p.set_ef(100)
+
+            p.set_num_threads(4)  # by default using all available cores
+
+            # We split the data in two batches:
+            data1 = data[:num_elements // 2]
+            data2 = data[num_elements // 2:]
+
+            print("Adding first batch of %d elements" % (len(data1)))
+            p.add_items(data1)
+
+            # Query the elements for themselves and measure recall:
+            labels, distances = p.knn_query(data1, k=1)
+
+            items=p.get_items(labels)
+
+            # Check the recall:
+            self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
+
+            # Check that the returned element data is correct:
+            diff_with_gt_labels=np.mean(np.abs(data1-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
+
+            # Serializing and deleting the index.
+            # We need the part to check that serialization is working properly.
+
+            index_path = 'first_half.bin'
+            print("Saving index to '%s'" % index_path)
+            p.save_index(index_path)
+            print("Saved. Deleting...")
+            del p
+            print("Deleted")
+
+            print("\n**** Mark delete test ****\n")
+            # Reiniting, loading the index
+            print("Reiniting")
+            p = hnswlib.Index(space='l2', dim=dim)
+
+            print("\nLoading index from '%s'\n" % index_path)
+            p.load_index(index_path)
+            p.set_ef(100)
+
+            print("Adding the second batch of %d elements" % (len(data2)))
+            p.add_items(data2)
+
+            # Query the elements for themselves and measure recall:
+            labels, distances = p.knn_query(data, k=1)
+            items=p.get_items(labels)
+
+            # Check the recall:
+            self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
+
+            # Check that the returned element data is correct:
+            diff_with_gt_labels=np.mean(np.abs(data-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # deleting index.
+
+            # Checking that all labels are returned correctly:
+            sorted_labels=sorted(p.get_ids_list())
+            self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
+
+            # Delete data1
+            labels1, _ = p.knn_query(data1, k=1)
+
+            for l in labels1:
+                p.mark_deleted(l[0])
+            labels2, _ = p.knn_query(data2, k=1)
+            items=p.get_items(labels2)
+            diff_with_gt_labels = np.mean(np.abs(data2-items))
+            self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-3) # console
+
+            labels1_after, _ = p.knn_query(data1, k=1)
+            for la in labels1_after:
+                for lb in labels1:
+                    if la[0] == lb[0]:
+                        self.assertTrue(False)
+            print("All the data in data1 are removed")
+
+            # checking saving/loading index with elements marked as deleted
+            del_index_path = "with_deleted.bin"
+            p.save_index(del_index_path)
+            p = hnswlib.Index(space='l2', dim=dim)
+            p.load_index(del_index_path)
+            p.set_ef(100)
+
+            labels1_after, _ = p.knn_query(data1, k=1)
+            for la in labels1_after:
+                for lb in labels1:
+                    if la[0] == lb[0]:
+                        self.assertTrue(False)
+
+        os.remove(index_path)
+        os.remove(del_index_path)
diff --git a/python_bindings/tests/bindings_test_pickle.py b/python_bindings/tests/bindings_test_pickle.py
new file mode 100644
index 00000000..3a42df2e
--- /dev/null
+++ b/python_bindings/tests/bindings_test_pickle.py
@@ -0,0 +1,152 @@
+import pickle
+import unittest
+
+import numpy as np
+
+import hnswlib
+
+
+def get_dist(metric, pt1, pt2):
+    if metric == 'l2':
+        return np.sum((pt1-pt2)**2)
+    elif metric == 'ip':
+        return 1. - np.sum(np.multiply(pt1, pt2))
+    elif metric == 'cosine':
+        return 1. - np.sum(np.multiply(pt1, pt2)) / (np.sum(pt1**2) * np.sum(pt2**2))**.5
+
+
+def brute_force_distances(metric, items, query_items, k):
+    dists = np.zeros((query_items.shape[0], items.shape[0]))
+    for ii in range(items.shape[0]):
+        for jj in range(query_items.shape[0]):
+            dists[jj,ii] = get_dist(metric, items[ii, :], query_items[jj, :])
+
+    labels = np.argsort(dists, axis=1) # equivalent, but faster: np.argpartition(dists, range(k), axis=1)
+    dists = np.sort(dists, axis=1)     # equivalent, but faster: np.partition(dists, range(k), axis=1)
+
+    return labels[:, :k], dists[:, :k]
+
+
+def check_ann_results(self, metric, items, query_items, k, ann_l, ann_d, err_thresh=0, total_thresh=0, dists_thresh=0):
+    brute_l, brute_d = brute_force_distances(metric, items, query_items, k)
+    err_total = 0
+    for jj in range(query_items.shape[0]):
+        err = np.sum(np.isin(brute_l[jj, :], ann_l[jj, :], invert=True))
+        if err > 0:
+            print(f"Warning: {err} labels are missing from ann results (k={k}, err_thresh={err_thresh})")
+
+        if err > err_thresh:
+            err_total += 1
+
+    self.assertLessEqual(err_total, total_thresh, f"Error: knn_query returned incorrect labels for {err_total} items (k={k})")
+
+    wrong_dists = np.sum(((brute_d - ann_d)**2.) > 1e-3)
+    if wrong_dists > 0:
+        dists_count = brute_d.shape[0]*brute_d.shape[1]
+        print(f"Warning: {wrong_dists} ann distance values are different from brute-force values (total # of values={dists_count}, dists_thresh={dists_thresh})")
+
+    self.assertLessEqual(wrong_dists, dists_thresh, msg=f"Error: {wrong_dists} ann distance values are different from brute-force values")
+
+
+def test_space_main(self, space, dim):
+
+    # Generating sample data
+    data = np.float32(np.random.random((self.num_elements, dim)))
+    test_data = np.float32(np.random.random((self.num_test_elements, dim)))
+
+    # Declaring index
+    p = hnswlib.Index(space=space, dim=dim)  # possible options are l2, cosine or ip
+    print(f"Running pickle tests for {p}")
+
+    p.num_threads = self.num_threads  # by default using all available cores
+
+    p0 = pickle.loads(pickle.dumps(p)) ### pickle un-initialized Index
+    p.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
+    p0.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M)
+
+    p.ef = self.ef
+    p0.ef = self.ef
+
+    p1 = pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
+
+    ### add items to ann index p,p0,p1
+    p.add_items(data)
+    p1.add_items(data)
+    p0.add_items(data)
+
+    p2=pickle.loads(pickle.dumps(p)) ### pickle Index before adding items
+
+    self.assertTrue(np.allclose(p.get_items(), p0.get_items()), "items for p and p0 must be same")
+    self.assertTrue(np.allclose(p0.get_items(), p1.get_items()), "items for p0 and p1 must be same")
+    self.assertTrue(np.allclose(p1.get_items(), p2.get_items()), "items for p1 and p2 must be same")
+
+    ### Test if returned distances are same
+    l, d = p.knn_query(test_data, k=self.k)
+    l0, d0 = p0.knn_query(test_data, k=self.k)
+    l1, d1 = p1.knn_query(test_data, k=self.k)
+    l2, d2 = p2.knn_query(test_data, k=self.k)
+
+    self.assertLessEqual(np.sum(((d-d0)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p and p0 must match")
+    self.assertLessEqual(np.sum(((d0-d1)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p0 and p1 must match")
+    self.assertLessEqual(np.sum(((d1-d2)**2.)>1e-3), self.dists_err_thresh, msg=f"knn distances returned by p1 and p2 must match")
+
+    ### check if ann results match brute-force search
+    ###   allow for 2 labels to be missing from ann results
+    check_ann_results(self, space, data, test_data, self.k, l, d,
+                           err_thresh=self.label_err_thresh,
+                           total_thresh=self.item_err_thresh,
+                           dists_thresh=self.dists_err_thresh)
+
+    check_ann_results(self, space, data, test_data, self.k, l2, d2,
+                           err_thresh=self.label_err_thresh,
+                           total_thresh=self.item_err_thresh,
+                           dists_thresh=self.dists_err_thresh)
+
+    ### Check ef parameter value
+    self.assertEqual(p.ef, self.ef, "incorrect value of p.ef")
+    self.assertEqual(p0.ef, self.ef, "incorrect value of p0.ef")
+    self.assertEqual(p2.ef, self.ef, "incorrect value of p2.ef")
+    self.assertEqual(p1.ef, self.ef, "incorrect value of p1.ef")
+
+    ### Check M parameter value
+    self.assertEqual(p.M, self.M, "incorrect value of p.M")
+    self.assertEqual(p0.M, self.M, "incorrect value of p0.M")
+    self.assertEqual(p1.M, self.M, "incorrect value of p1.M")
+    self.assertEqual(p2.M, self.M, "incorrect value of p2.M")
+
+    ### Check ef_construction parameter value
+    self.assertEqual(p.ef_construction, self.ef_construction, "incorrect value of p.ef_construction")
+    self.assertEqual(p0.ef_construction, self.ef_construction, "incorrect value of p0.ef_construction")
+    self.assertEqual(p1.ef_construction, self.ef_construction, "incorrect value of p1.ef_construction")
+    self.assertEqual(p2.ef_construction, self.ef_construction, "incorrect value of p2.ef_construction")
+
+
+class PickleUnitTests(unittest.TestCase):
+
+    def setUp(self):
+
+        self.ef_construction = 725
+        self.M = 64
+        self.ef = 725
+
+        self.num_elements = 5000
+        self.num_test_elements = 200
+
+        self.num_threads = 4
+        self.k = 25
+
+        self.label_err_thresh = 5  ### max number of missing labels allowed per test item
+        self.item_err_thresh = 5   ### max number of items allowed with incorrect labels
+
+        self.dists_err_thresh = 50 ### for two matrices, d1 and d2, dists_err_thresh controls max
+                                 ### number of value pairs that are allowed to be different in d1 and d2
+                                 ### i.e., number of values that are (d1-d2)**2>1e-3
+
+    def test_inner_product_space(self):
+        test_space_main(self, 'ip', 48)
+
+    def test_l2_space(self):
+        test_space_main(self, 'l2', 153)
+
+    def test_cosine_space(self):
+        test_space_main(self, 'cosine', 512)
diff --git a/python_bindings/tests/bindings_test_resize.py b/python_bindings/tests/bindings_test_resize.py
index 9411af64..3c4e3e4f 100644
--- a/python_bindings/tests/bindings_test_resize.py
+++ b/python_bindings/tests/bindings_test_resize.py
@@ -1,12 +1,15 @@
 import unittest
 
+import numpy as np
+
+import hnswlib
+
 
 class RandomSelfTestCase(unittest.TestCase):
     def testRandomSelf(self):
       for idx in range(16):
         print("\n**** Index resize test ****\n")
-        import hnswlib
-        import numpy as np
+
         np.random.seed(idx)
         dim = 16
         num_elements = 10000
@@ -25,7 +28,7 @@ def testRandomSelf(self):
         # M - is tightly connected with internal dimensionality of the data
         #     stronlgy affects the memory consumption
 
-        p.init_index(max_elements = num_elements//2, ef_construction = 100, M = 16)
+        p.init_index(max_elements=num_elements//2, ef_construction=100, M=16)
 
         # Controlling the recall by setting ef:
         # higher ef leads to better accuracy, but slower search
@@ -43,20 +46,18 @@ def testRandomSelf(self):
         # Query the elements for themselves and measure recall:
         labels, distances = p.knn_query(data1, k=1)
 
-        items=p.get_items(list(range(len(data1))))
+        items = p.get_items(list(range(len(data1))))
 
         # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3)
 
         # Check that the returned element data is correct:
-        diff_with_gt_labels=np.max(np.abs(data1-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
+        diff_with_gt_labels = np.max(np.abs(data1-items))
+        self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
 
         print("Resizing the index")
         p.resize_index(num_elements)
 
-
-
         print("Adding the second batch of %d elements" % (len(data2)))
         p.add_items(data2)
 
@@ -65,18 +66,12 @@ def testRandomSelf(self):
         items=p.get_items(list(range(num_elements)))
 
         # Check the recall:
-        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))),1.0,3)
+        self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3)
 
         # Check that the returned element data is correct:
         diff_with_gt_labels=np.max(np.abs(data-items))
-        self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4)
+        self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4)
 
         # Checking that all labels are returned correcly:
         sorted_labels=sorted(p.get_ids_list())
-        self.assertEqual(np.sum(~np.asarray(sorted_labels)==np.asarray(range(num_elements))),0)
-
-
-
-
-if __name__ == "__main__":
-    unittest.main()
+        self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0)
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..15665f31
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,120 @@
+import os
+import sys
+
+import numpy as np
+import pybind11
+import setuptools
+from setuptools import Extension, setup
+from setuptools.command.build_ext import build_ext
+
+__version__ = '0.5.0'
+
+
+include_dirs = [
+    pybind11.get_include(),
+    np.get_include(),
+]
+
+# compatibility when run in python_bindings
+bindings_dir = 'python_bindings'
+if bindings_dir in os.path.basename(os.getcwd()):
+    source_files = ['./bindings.cpp']
+    include_dirs.extend(['../hnswlib/'])
+else:
+    source_files = ['./python_bindings/bindings.cpp']
+    include_dirs.extend(['./hnswlib/'])
+
+
+libraries = []
+extra_objects = []
+
+
+ext_modules = [
+    Extension(
+        'hnswlib',
+        source_files,
+        include_dirs=include_dirs,
+        libraries=libraries,
+        language='c++',
+        extra_objects=extra_objects,
+    ),
+]
+
+
+# As of Python 3.6, CCompiler has a `has_flag` method.
+# cf http://bugs.python.org/issue26689
+def has_flag(compiler, flagname):
+    """Return a boolean indicating whether a flag name is supported on
+    the specified compiler.
+    """
+    import tempfile
+    with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
+        f.write('int main (int argc, char **argv) { return 0; }')
+        try:
+            compiler.compile([f.name], extra_postargs=[flagname])
+        except setuptools.distutils.errors.CompileError:
+            return False
+    return True
+
+
+def cpp_flag(compiler):
+    """Return the -std=c++[11/14] compiler flag.
+    The c++14 is prefered over c++11 (when it is available).
+    """
+    if has_flag(compiler, '-std=c++14'):
+        return '-std=c++14'
+    elif has_flag(compiler, '-std=c++11'):
+        return '-std=c++11'
+    else:
+        raise RuntimeError('Unsupported compiler -- at least C++11 support '
+                           'is needed!')
+
+
+class BuildExt(build_ext):
+    """A custom build extension for adding compiler-specific options."""
+    c_opts = {
+        'msvc': ['/EHsc', '/openmp', '/O2'],
+        'unix': ['-O3', '-march=native'],  # , '-w'
+    }
+    link_opts = {
+        'unix': [],
+        'msvc': [],
+    }
+
+    if sys.platform == 'darwin':
+        c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
+        link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
+    else:
+        c_opts['unix'].append("-fopenmp")
+        link_opts['unix'].extend(['-fopenmp', '-pthread'])
+
+    def build_extensions(self):
+        ct = self.compiler.compiler_type
+        opts = self.c_opts.get(ct, [])
+        if ct == 'unix':
+            opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
+            opts.append(cpp_flag(self.compiler))
+            if has_flag(self.compiler, '-fvisibility=hidden'):
+                opts.append('-fvisibility=hidden')
+        elif ct == 'msvc':
+            opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
+
+        for ext in self.extensions:
+            ext.extra_compile_args.extend(opts)
+            ext.extra_link_args.extend(self.link_opts.get(ct, []))
+
+        build_ext.build_extensions(self)
+
+
+setup(
+    name='hnswlib',
+    version=__version__,
+    description='hnswlib',
+    author='Yury Malkov and others',
+    url='https://github.com/yurymalkov/hnsw',
+    long_description="""hnsw""",
+    ext_modules=ext_modules,
+    install_requires=['numpy'],
+    cmdclass={'build_ext': BuildExt},
+    zip_safe=False,
+)
diff --git a/sift_1b.cpp b/sift_1b.cpp
index 273c9828..2739490c 100644
--- a/sift_1b.cpp
+++ b/sift_1b.cpp
@@ -242,11 +242,11 @@ void sift_test1B() {
     size_t vecdim = 128;
     char path_index[1024];
     char path_gt[1024];
-    char *path_q = "bigann/bigann_query.bvecs";
-    char *path_data = "bigann/bigann_base.bvecs";
+    char *path_q = "../bigann/bigann_query.bvecs";
+    char *path_data = "../bigann/bigann_base.bvecs";
     sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M);
 
-    sprintf(path_gt, "bigann/gnd/idx_%dM.ivecs", subset_size_milllions);
+    sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions);
 
 
     unsigned char *massb = new unsigned char[vecdim];