forked from erikbern/ann-benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for result hdf file, and added big knn recall
- Loading branch information
1 parent
5bc8daf
commit 3b056fb
Showing
8 changed files
with
304 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,3 +19,4 @@ venv | |
.idea | ||
aerospike/data/*.hdf5 | ||
aerospike/data/* | ||
aerospike/results/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import numpy as np | ||
|
||
def compute_recall_without_distance_ties(true_ids, run_ids, count): | ||
return len(set(true_ids) & set(run_ids)) | ||
|
||
def compute_recall_with_distance_ties(true_ids, true_dists, run_ids, count): | ||
# This function assumes "true_dists" is monotonic either increasing or decreasing | ||
|
||
found_tie = False | ||
gt_size = np.shape(true_dists)[0] | ||
|
||
if gt_size==count: | ||
# nothing fancy to do in this case | ||
recall = len(set(true_ids[:count]) & set(run_ids)) | ||
|
||
else: | ||
dist_tie_check = true_dists[count-1] # tie check anchored at count-1 in GT dists | ||
|
||
set_end = gt_size | ||
|
||
for i in range(count, gt_size): | ||
is_close = abs(dist_tie_check - true_dists[i] ) < 1e-6 | ||
if not is_close: | ||
set_end = i | ||
break | ||
|
||
found_tie = set_end > count | ||
|
||
recall = len(set(true_ids[:set_end]) & set(run_ids)) | ||
|
||
return recall, found_tie | ||
|
||
def get_recall_values(true_nn, run_nn, count, count_ties=True): | ||
true_ids, true_dists = true_nn | ||
if not count_ties: | ||
true_ids = true_ids[:, :count] | ||
assert true_ids.shape == run_nn.shape | ||
recalls = np.zeros(len(run_nn)) | ||
queries_with_ties = 0 | ||
# TODO probably not very efficient | ||
for i in range(len(run_nn)): | ||
if count_ties: | ||
recalls[i], found_tie = compute_recall_with_distance_ties(true_ids[i], true_dists[i], run_nn[i], count) | ||
if found_tie: queries_with_ties += 1 | ||
else: | ||
recalls[i] = compute_recall_without_distance_ties(true_ids[i], run_nn[i], count) | ||
return (np.mean(recalls) / float(count), | ||
np.std(recalls) / float(count), | ||
recalls, | ||
queries_with_ties) | ||
|
||
def knn(true_nn, run_nn, count, metrics): | ||
if 'knn' not in metrics: | ||
print('Computing knn metrics') | ||
knn_metrics = metrics.create_group('knn') | ||
mean, std, recalls, queries_with_ties = get_recall_values(true_nn, run_nn, count) | ||
if queries_with_ties>0: | ||
print("Warning: %d/%d queries contained ties accounted for in recall" % (queries_with_ties, len(run_nn))) | ||
knn_metrics.attrs['mean'] = mean | ||
knn_metrics.attrs['std'] = std | ||
knn_metrics['recalls'] = recalls | ||
else: | ||
print("Found cached result") | ||
return metrics['knn'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
ann_benchmarks/algorithms/aerospike/configangulareuclidean.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
float: | ||
angular: | ||
- base_args: ['@metric', '@dimension'] | ||
constructor: Aerospike | ||
disabled: false | ||
docker_tag: ann-benchmarks-aerospike | ||
module: ann_benchmarks.algorithms.aerospike | ||
name: aerospike | ||
run_groups: | ||
cosine: | ||
args: [ | ||
[cosine], | ||
[{m: 16, ef_construction: 100, ef: 100}] | ||
] | ||
query_args: [ | ||
[] | ||
] | ||
euclidean: | ||
- base_args: ['@metric', '@dimension'] | ||
constructor: Aerospike | ||
disabled: false | ||
docker_tag: ann-benchmarks-aerospike | ||
module: ann_benchmarks.algorithms.aerospike | ||
name: aerospike | ||
run_groups: | ||
SQUARED_EUCLIDEAN: | ||
args: [ | ||
[SQUARED_EUCLIDEAN], #Idx Type | ||
[{m: 16, ef_construction: 100, ef: 100}] | ||
] | ||
query_args: [ | ||
[] | ||
] | ||
|