5_knnmlpr1.py

# -*- coding: utf-8 -*-
"""5.KNNMLPR1.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1nlGuDv0g1MaZVjw7ayqoazhaleTXNzKT

# **The k-nearest neighbors (KNN) algorithm**
- It is a **non-parametric, supervised learning** classifier, which uses proximity to make classifications or predictions about the grouping of an individual data point.
It is one of the popular and **simplest** classification and regression algorithm
- Find value of k ,then according k value finding number of neighbours points near to the k points using Ecludian and Manhatten distance
- In regressor problem:calculate nearest neighbours average .

## **I.KNN CLASSIFIER:**
- JUNE2024
- PRACTICAL
"""

import numpy as np
import pandas as pd

#Load dataset
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

data.data

data.feature_names

data.target

data.target_names

# create dtaframe
df = pd.DataFrame(np.c_[data.data, data.target], columns=[list(data.feature_names)+['target']])
df.head()

df.shape

df.info()

"""## **SPLIT THEDATA:X,y**"""

#split the data:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

"""## **Split into Training and Test data**"""

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2020)

print('Shape of X_train = ', X_train.shape)
print('Shape of y_train = ', y_train.shape)
print('Shape of X_test = ', X_test.shape)
print('Shape of y_test = ', y_test.shape)

""" ## **Model Training KNeighborsClassifier**"""

from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

classifier.score(X_test, y_test)

## Predict Cancer""":NEW DATA

patient1 = [17.99,
 10.38,
 122.8,
 1001.0,
 0.1184,
 0.2776,
 0.3001,
 0.1471,
 0.2419,
 0.07871,
 1.095,
 0.9053,
 8.589,
 153.4,
 0.006399,
 0.04904,
 0.05373,
 0.01587,
 0.03003,
 0.006193,
 25.38,
 17.33,
 184.6,
 2019.0,
 0.1622,
 0.6656,
 0.7119,
 0.2654,
 0.4601,
 0.1189]  #30FEARTURES

patient1 = np.array([patient1])

patient1

classifier.predict(patient1) #CANCER DETECT

pred = classifier.predict(patient1)

if pred[0] == 0:
  print('Patient has Cancer (malignant tumor)')
else:
  print('Patient has no Cancer (malignant benign)')

"""## **Evaluation metrics:**"""

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
#Prediction on x_test
y_pred = classifier.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

"""- *classifier.score(X_test, y_test)*:
 - - Directly computes accuracy by predicting and evaluating in one step, ensuring consistency and simplicity.
- *accuracy_score(*y_test, y_pred):*
 - - Calculates accuracy from given true and predicted labels, offering flexibility to handle predictions manually.
"""

# Precision, Recall, F1-score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

"""### **CONCLUSIONS:**
- For class 0.0, the precision is 90%, recall is 94%, and the F1-score is 92%. This means the model correctly identified 90% of true positives out of all predicted positives and successfully recalled 94% of the actual positives.
- For class 1.0, the precision is 95%, recall is 92%, and the F1-score is 94%, indicating very strong performance in identifying and recalling true positives for this class.

## **Hyperparameter tuning :GridSearchCV**
"""

from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

knn = KNeighborsClassifier()#knn ml

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Train and evaluate the model with the best hyperparameters
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

"""- After hyperparameter tuning:
- Improved Precision for Class 0.0: The model became better at reducing false positives for class 0.0.
- Improved Recall for Class 1.0: The model became better at identifying true positives for class 1.0.
"""


"""## **SAVE THE MODEL:**"""

import joblib

joblib.dump(classifier, 'KNN_Breastcancermodel.pkl')

joblib.dump(knn, 'KNN_Breastcancermodel2.pkl')