-
Notifications
You must be signed in to change notification settings - Fork 3
/
PCA_plot_for_dataset.py
93 lines (59 loc) · 1.92 KB
/
PCA_plot_for_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 23 08:56:39 2022
@author: rbouman
"""
import os
from numpy.linalg import svd
from sklearn.preprocessing import StandardScaler
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pickle_dir = "formatted_data"
base_result_dir = "results"
result_dir = "result_dir"
csvresult_dir = "csvresult_dir"
score_dir = "score_dir"
log_dir = "logs"
method_name = "EIF"
dataset_name = "musk"
picklefile_name = dataset_name + ".pickle"
full_path_filename = os.path.join(pickle_dir, picklefile_name)
data = pickle.load(open(full_path_filename, 'rb'))
X, y = data["X"], np.squeeze(data["y"])
score_folder_path = os.path.join(base_result_dir, score_dir, dataset_name, method_name)
hyperparameter_scores = os.listdir(score_folder_path)
n_scores = len(hyperparameter_scores)
score_sums = np.zeros(y.shape)
for hyperparameter_score in hyperparameter_scores:
full_path_filename = os.path.join(score_folder_path, hyperparameter_score)
score_sums += pd.read_csv(full_path_filename, names=["scores"])["scores"]
scores = score_sums/n_scores
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
_, S, Vt = svd(X_scaled, full_matrices=False)
V = Vt.T
var_explained = S**2 / np.sum(S**2)
X_PCA = X.dot(V)
#%% make plots
plt.figure()
plt.title("class colored plot: ")
plt.scatter(X_PCA[y==0,0], X_PCA[y==0,1], label="normal")
plt.scatter(X_PCA[y==1,0], X_PCA[y==1,1], label="outlier")
plt.xlabel("PC1 " + str(var_explained[0]*100) + "% var explained")
plt.ylabel("PC2 " + str(var_explained[1]*100) + "% var explained")
plt.legend()
plt.figure()
plt.title
plt.show()
plt.figure()
plt.title("score colored plot")
plt.scatter(X_PCA[:,0], X_PCA[:,1], c=scores)
plt.xlabel("PC1 " + str(var_explained[0]*100) + "% var explained")
plt.ylabel("PC2 " + str(var_explained[1]*100) + "% var explained")
plt.colorbar()
plt.figure()
plt.title
plt.show()