Skip to content

Commit

Permalink
add performance test for commits
Browse files Browse the repository at this point in the history
  • Loading branch information
yurymalkov committed Dec 9, 2021
1 parent cca297a commit bcbcb5d
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
16 changes: 16 additions & 0 deletions examples/git_tester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from pydriller import Repository
import os
import datetime
os.system("cp examples/speedtest.py examples/speedtest2.py")
for commit in Repository('.', from_tag="v0.5.2").traverse_commits():
print(commit.hash)
print(commit.msg)

os.system(f"git checkout {commit.hash}; rm -rf build; ")
os.system("python -m pip install .")
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 1')
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 64 -t 1')
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 1')
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 24')
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 24')

62 changes: 62 additions & 0 deletions examples/speedtest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import hnswlib
import numpy as np
import os.path
import time
import argparse

# Use nargs to specify how many arguments an option should take.
ap = argparse.ArgumentParser()
ap.add_argument('-d')
ap.add_argument('-n')
ap.add_argument('-t')
args = ap.parse_args()
dim = int(args.d)
name = args.n
threads=int(args.t)
num_elements = 1000000 * 4//dim

# Generating sample data
np.random.seed(1)
data = np.float32(np.random.random((num_elements, dim)))


index_path=f'speed_index{dim}.bin'
# Declaring index
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip

if not os.path.isfile(index_path) :

p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Controlling the recall by setting ef:
# higher ef leads to better accuracy, but slower search
p.set_ef(10)

# Set number of threads used during batch search/construction
# By default using all available cores
p.set_num_threads(12)

p.add_items(data)

# Serializing and deleting the index:

print("Saving index to '%s'" % index_path)
p.save_index(index_path)
p.set_num_threads(threads)
times=[]
time.sleep(10)
p.set_ef(100)
for _ in range(3):
p.load_index(index_path)
for _ in range(10):
t0=time.time()
labels, distances = p.knn_query(data, k=1)
tt=time.time()-t0
times.append(tt)
print(f"{tt} seconds")
str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}"
print(str_out)
with open (f"log_{dim}_t{threads}.txt","a") as f:
f.write(str_out+"\n")
f.flush()

0 comments on commit bcbcb5d

Please sign in to comment.