Download our e-book of Introduction To Python
Shashank Shanu
3 years ago
# import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set()
# Loading dataset and creating dependent and independent variables.
breast_cancer = load_breast_cancer()
X = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
X = X[['mean area', 'mean compactness']]
y = pd.Categorical.from_codes(breast_cancer.target, breast_cancer.target_names)
y = pd.get_dummies(y, drop_first=true)
#Spliting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
sns.scatterplot(
x='mean area',
y='mean compactness',
hue='benign',
data=X_test.join(y_test, how='outer')
)
plt.scatter(
X_test['mean area'],
X_test['mean compactness'],
c=y_pred,
cmap='coolwarm',
alpha=0.7
)
confusion_matrix(y_test, y_pred)
array([[45, 9],
[ 3, 86]], dtype=int64)
accuracy_score(y_test, y_pred)
0.916083916083916