exp3_knn.py

# -*- coding: utf-8 -*-
"""exp3-KNN.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1Ls1XcNCgeuOhXKECEKYrst7Cuz-Pv5tK
"""

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
cols = ['Diagnosis', 'radius1', 'texture1', 'perimeter1', 'area1', 'smoothness1', 'compactness', 'concavity1', 'concave_points1', 'symmetry1', 'fractal_dimenstion1', 'radius2', 'texture2', 'perimeter2', 'area2', 'smoothness2', 'compactness2', 'concavity2', 'concave_points2', 'symmetry2', 'fractal_dimenstion2', 'radius3', 'texture3', 'perimeter3', 'area3', 'smoothness3', 'compactness3', 'concavity3', 'concave_points3', 'symmetry3', 'fractal_dimension3']
df = pd.read_csv('wdbc.data', names=cols)
print(df.head(10))

#Pair of two features. each pair in X and Y respectively
#Scatter Plot
#Digraph

#Dropping unnecessary Columns
#df.drop(['Unnamed:32', 'id'], axis=1, inplace=True)

def diagnosis_value(Diagnosis):
    return 1 if Diagnosis == 'M' else 0

df['Diagnosis'] = df['Diagnosis'].apply(diagnosis_value)
df['Diagnosis'] = df['Diagnosis'].astype('category').cat.codes

sns.lmplot(x='radius1', y='texture1', hue='Diagnosis', data=df)
plt.title("Scatterplot: Radius Mean vs. Texture Mean")
plt.show()

# Split data into training and testing sets
X = np.array(df.iloc[:, 1:])  # Features
y = np.array(df['Diagnosis'])  # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Commented out IPython magic to ensure Python compatibility.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

plt.rcParams["figure.figsize"] = [7.00, 3.50]
plt.rcParams["figure.autolayout"] = True

n_neighbors = 15

X = df.data[:, :2]
y = df.target
h = .02

cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ['darkorange', 'c', 'darkblue']


clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
clf.fit(X, y)

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure()

plt.contourf(xx, yy, Z, cmap=cmap_light)

sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=df.target_names[y],
palette=cmap_bold, alpha=1.0, edgecolor="black")

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.title("3-Class classification (k = %i, 'uniform' = '%s')"
# % (n_neighbors, 'uniform'))

plt.xlabel(df.feature_names[2])
plt.ylabel(df.feature_names[1])

plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_breast_cancer
from matplotlib.colors import ListedColormap

# Load the Breast Cancer Wisconsin dataset
#data = load_breast_cancer()
data = df
X = data.[:, :2]  # Select the first two features for visualization
y = data.target

# Create a KNN classifier
n_neighbors = 15
clf = KNeighborsClassifier(n_neighbors, weights='uniform')
clf.fit(X, y)

# Generate a mesh grid for plotting
h = .02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Create a colormap for the plot
cmap_light = ListedColormap(['orange', 'cyan'])

# Plot the Voronoi regions
plt.figure(figsize=(7, 3.5))
plt.contourf(xx, yy, Z, cmap=cmap_light, alpha=0.8)

# Scatter plot of data points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_light, edgecolor="black")
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.title(f"3-Class classification (k = {n_neighbors}, 'uniform')")
plt.xlabel(data.feature_names[0])
plt.ylabel(data.feature_names[1])

plt.show()

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.spatial import Voronoi, voronoi_plot_2d
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import classification_report
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# Convert the 'Diagnosis' column to categorical codes
df['Diagnosis'] = df['Diagnosis'].apply(diagnosis_value)
df['Diagnosis'] = df['Diagnosis'].astype('category').cat.codes

# Create a scatterplot of 'radius1' vs. 'texture1' with different colors for diagnosis
sns.lmplot(x='radius2', y='texture2', hue='Diagnosis', data=df)
plt.title("Scatterplot: Radius Mean vs. Texture Mean")

# Compute the Voronoi diagram
vor = Voronoi(df[['radius2', 'texture2']])

# Plot the Voronoi regions
fig = voronoi_plot_2d(vor, show_vertices=False, line_colors='orange', line_width=2, line_alpha=0.6, point_size=2)

plt.show()