-
Notifications
You must be signed in to change notification settings - Fork 0
/
eigenvalue_analysis.py
49 lines (37 loc) · 1.18 KB
/
eigenvalue_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sqlite3
import numpy as np
import networkx as nx
from numpy import linalg as LA
SIM_THRESHOLD = 0.4
def get_words_from_db():
conn = sqlite3.connect('20news-18828.db')
cursor = conn.cursor()
cursor.execute('SELECT * FROM all_words_count limit 300 where in_nasari=1')
rows = cursor.fetchall()
return rows
def build_graph(full_dicts):
def find_eig_gap(eig_values, num):
diffs = []
pos = []
for i, eig in enumerate(eig_values):
if i+1 < len(eig_values):
diff = eig_values[i+1] - eig
if len(diffs)<num:
diffs.append(diff)
pos.append(i+1)
elif diff > min(diffs):
to_rm = diffs.index(min(diffs))
diffs.pop(to_rm)
pos.pop(to_rm)
diffs.append(diff)
pos.append(i + 1)
else:
return dict(zip(pos, diffs))
def main():
full_word_list = get_words_from_db()
G = build_graph(full_word_list)
eig_values, eig_vectors = LA.eigh(nx.normalized_laplacian_matrix(G).todense())
num_of_top_eig_values = 5
eig_gaps = find_eig_gap(eig_values, num_of_top_eig_values)
print eig_gaps
main()