-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_nn.py
46 lines (35 loc) · 1.36 KB
/
compute_nn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import faiss
import numpy as np
from tqdm import tqdm
with open("embeddings/caption_embeddings_train.npy", "rb") as fp: # Unpickling
caption_embeds = np.load(fp)
with open("embeddings/image_embeddings_train.npy", "rb") as fp: # Unpickling
img_embeds = np.load(fp)
index = faiss.read_index("embeddings/IVFFlatText_train.faiss")
index.nprobe = 128
nearest_neighbors = np.zeros((len(img_embeds), 5), dtype=int)
batch_size = 10000
u = 0
pbar=tqdm(total=len(img_embeds))
while(u < len(img_embeds)):
query = img_embeds[u:u+batch_size]
D, I = index.search(query, 6)
for i, elem in enumerate(I):
if(i in elem):
nearest_neighbors[i+u] = [indice for indice in elem if (indice != i)]
else:
nearest_neighbors[i+u] = [indice for indice in elem[:5]]
# print([indice for indice in elem if (indice != i)])
u += batch_size
pbar.update(batch_size)
if(u != len(img_embeds)):
query = img_embeds[u:]
D, I = index.search(query, 6)
for i, elem in enumerate(I):
if(i in elem):
nearest_neighbors[i+u] = [indice for indice in elem if (indice != i)]
else:
nearest_neighbors[i+u] = [indice for indice in elem[:5]]
# print([indice for indice in elem if (indice != i)])
with open("embeddings/nn_train.npy", 'wb') as f:
np.save(f, nearest_neighbors, allow_pickle=False)