-
Notifications
You must be signed in to change notification settings - Fork 152
/
plot.py
34 lines (24 loc) · 966 Bytes
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import multiprocessing
import numpy as np
import random
import sys
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
df=pd.read_csv('1_total_fee_w2v.csv')
l=list(df['1_total_fee'].astype('str'))
name=list(df)
def plot_with_labels(low_dim_embs, labels, filename = 'tsne.png'):
assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
plt.figure(figsize= (10, 18))
for i, label in enumerate(labels):
x, y = low_dim_embs[i, :]
plt.scatter(x, y)
plt.annotate(label, xy = (x, y), textcoords = 'offset points', ha = 'right', va = 'bottom')
plt.savefig(filename)
tsne = TSNE(perplexity = 30, n_components = 2, init = 'pca', n_iter = 5000)
plot_only = 300
low_dim_embs = tsne.fit_transform(df.iloc[:plot_only][name[1:]])
labels = [l[i] for i in range(plot_only)]
plot_with_labels(low_dim_embs, labels)