diff --git a/week 2/priyatam75_week2.ipynb b/week 2/priyatam75_week2.ipynb
new file mode 100644
index 0000000..a111c45
--- /dev/null
+++ b/week 2/priyatam75_week2.ipynb
@@ -0,0 +1,641 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# WEEK-2"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "Learn about Counter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Counter is a class in the collections module of python, it is genearlly used to count hashable objects. it automatically creates a hash table when an iterable is invoked.\n",
+ "the most_common() methode is used to display the elements of object having higher frequency compared to other elements.\n"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "Implementing with Iris Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "the dataset has been downloaded from kaggle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from collections import Counter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " SepalLengthCm | \n",
+ " SepalWidthCm | \n",
+ " PetalLengthCm | \n",
+ " PetalWidthCm | \n",
+ " Species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 4.9 | \n",
+ " 3.0 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 4.7 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 4.6 | \n",
+ " 3.1 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n",
+ "0 1 5.1 3.5 1.4 0.2 Iris-setosa\n",
+ "1 2 4.9 3.0 1.4 0.2 Iris-setosa\n",
+ "2 3 4.7 3.2 1.3 0.2 Iris-setosa\n",
+ "3 4 4.6 3.1 1.5 0.2 Iris-setosa\n",
+ "4 5 5.0 3.6 1.4 0.2 Iris-setosa"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data=pd.read_csv('Iris.csv')\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# splitting into x and y\n",
+ "X=data.iloc[:,0:5].values\n",
+ "Y=data.iloc[:,5].values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[1.00e+00 5.10e+00 3.50e+00 1.40e+00 2.00e-01]\n",
+ " [2.00e+00 4.90e+00 3.00e+00 1.40e+00 2.00e-01]\n",
+ " [3.00e+00 4.70e+00 3.20e+00 1.30e+00 2.00e-01]\n",
+ " [4.00e+00 4.60e+00 3.10e+00 1.50e+00 2.00e-01]\n",
+ " [5.00e+00 5.00e+00 3.60e+00 1.40e+00 2.00e-01]\n",
+ " [6.00e+00 5.40e+00 3.90e+00 1.70e+00 4.00e-01]\n",
+ " [7.00e+00 4.60e+00 3.40e+00 1.40e+00 3.00e-01]\n",
+ " [8.00e+00 5.00e+00 3.40e+00 1.50e+00 2.00e-01]\n",
+ " [9.00e+00 4.40e+00 2.90e+00 1.40e+00 2.00e-01]\n",
+ " [1.00e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n",
+ " [1.10e+01 5.40e+00 3.70e+00 1.50e+00 2.00e-01]\n",
+ " [1.20e+01 4.80e+00 3.40e+00 1.60e+00 2.00e-01]\n",
+ " [1.30e+01 4.80e+00 3.00e+00 1.40e+00 1.00e-01]\n",
+ " [1.40e+01 4.30e+00 3.00e+00 1.10e+00 1.00e-01]\n",
+ " [1.50e+01 5.80e+00 4.00e+00 1.20e+00 2.00e-01]\n",
+ " [1.60e+01 5.70e+00 4.40e+00 1.50e+00 4.00e-01]\n",
+ " [1.70e+01 5.40e+00 3.90e+00 1.30e+00 4.00e-01]\n",
+ " [1.80e+01 5.10e+00 3.50e+00 1.40e+00 3.00e-01]\n",
+ " [1.90e+01 5.70e+00 3.80e+00 1.70e+00 3.00e-01]\n",
+ " [2.00e+01 5.10e+00 3.80e+00 1.50e+00 3.00e-01]\n",
+ " [2.10e+01 5.40e+00 3.40e+00 1.70e+00 2.00e-01]\n",
+ " [2.20e+01 5.10e+00 3.70e+00 1.50e+00 4.00e-01]\n",
+ " [2.30e+01 4.60e+00 3.60e+00 1.00e+00 2.00e-01]\n",
+ " [2.40e+01 5.10e+00 3.30e+00 1.70e+00 5.00e-01]\n",
+ " [2.50e+01 4.80e+00 3.40e+00 1.90e+00 2.00e-01]\n",
+ " [2.60e+01 5.00e+00 3.00e+00 1.60e+00 2.00e-01]\n",
+ " [2.70e+01 5.00e+00 3.40e+00 1.60e+00 4.00e-01]\n",
+ " [2.80e+01 5.20e+00 3.50e+00 1.50e+00 2.00e-01]\n",
+ " [2.90e+01 5.20e+00 3.40e+00 1.40e+00 2.00e-01]\n",
+ " [3.00e+01 4.70e+00 3.20e+00 1.60e+00 2.00e-01]\n",
+ " [3.10e+01 4.80e+00 3.10e+00 1.60e+00 2.00e-01]\n",
+ " [3.20e+01 5.40e+00 3.40e+00 1.50e+00 4.00e-01]\n",
+ " [3.30e+01 5.20e+00 4.10e+00 1.50e+00 1.00e-01]\n",
+ " [3.40e+01 5.50e+00 4.20e+00 1.40e+00 2.00e-01]\n",
+ " [3.50e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n",
+ " [3.60e+01 5.00e+00 3.20e+00 1.20e+00 2.00e-01]\n",
+ " [3.70e+01 5.50e+00 3.50e+00 1.30e+00 2.00e-01]\n",
+ " [3.80e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n",
+ " [3.90e+01 4.40e+00 3.00e+00 1.30e+00 2.00e-01]\n",
+ " [4.00e+01 5.10e+00 3.40e+00 1.50e+00 2.00e-01]\n",
+ " [4.10e+01 5.00e+00 3.50e+00 1.30e+00 3.00e-01]\n",
+ " [4.20e+01 4.50e+00 2.30e+00 1.30e+00 3.00e-01]\n",
+ " [4.30e+01 4.40e+00 3.20e+00 1.30e+00 2.00e-01]\n",
+ " [4.40e+01 5.00e+00 3.50e+00 1.60e+00 6.00e-01]\n",
+ " [4.50e+01 5.10e+00 3.80e+00 1.90e+00 4.00e-01]\n",
+ " [4.60e+01 4.80e+00 3.00e+00 1.40e+00 3.00e-01]\n",
+ " [4.70e+01 5.10e+00 3.80e+00 1.60e+00 2.00e-01]\n",
+ " [4.80e+01 4.60e+00 3.20e+00 1.40e+00 2.00e-01]\n",
+ " [4.90e+01 5.30e+00 3.70e+00 1.50e+00 2.00e-01]\n",
+ " [5.00e+01 5.00e+00 3.30e+00 1.40e+00 2.00e-01]\n",
+ " [5.10e+01 7.00e+00 3.20e+00 4.70e+00 1.40e+00]\n",
+ " [5.20e+01 6.40e+00 3.20e+00 4.50e+00 1.50e+00]\n",
+ " [5.30e+01 6.90e+00 3.10e+00 4.90e+00 1.50e+00]\n",
+ " [5.40e+01 5.50e+00 2.30e+00 4.00e+00 1.30e+00]\n",
+ " [5.50e+01 6.50e+00 2.80e+00 4.60e+00 1.50e+00]\n",
+ " [5.60e+01 5.70e+00 2.80e+00 4.50e+00 1.30e+00]\n",
+ " [5.70e+01 6.30e+00 3.30e+00 4.70e+00 1.60e+00]\n",
+ " [5.80e+01 4.90e+00 2.40e+00 3.30e+00 1.00e+00]\n",
+ " [5.90e+01 6.60e+00 2.90e+00 4.60e+00 1.30e+00]\n",
+ " [6.00e+01 5.20e+00 2.70e+00 3.90e+00 1.40e+00]\n",
+ " [6.10e+01 5.00e+00 2.00e+00 3.50e+00 1.00e+00]\n",
+ " [6.20e+01 5.90e+00 3.00e+00 4.20e+00 1.50e+00]\n",
+ " [6.30e+01 6.00e+00 2.20e+00 4.00e+00 1.00e+00]\n",
+ " [6.40e+01 6.10e+00 2.90e+00 4.70e+00 1.40e+00]\n",
+ " [6.50e+01 5.60e+00 2.90e+00 3.60e+00 1.30e+00]\n",
+ " [6.60e+01 6.70e+00 3.10e+00 4.40e+00 1.40e+00]\n",
+ " [6.70e+01 5.60e+00 3.00e+00 4.50e+00 1.50e+00]\n",
+ " [6.80e+01 5.80e+00 2.70e+00 4.10e+00 1.00e+00]\n",
+ " [6.90e+01 6.20e+00 2.20e+00 4.50e+00 1.50e+00]\n",
+ " [7.00e+01 5.60e+00 2.50e+00 3.90e+00 1.10e+00]\n",
+ " [7.10e+01 5.90e+00 3.20e+00 4.80e+00 1.80e+00]\n",
+ " [7.20e+01 6.10e+00 2.80e+00 4.00e+00 1.30e+00]\n",
+ " [7.30e+01 6.30e+00 2.50e+00 4.90e+00 1.50e+00]\n",
+ " [7.40e+01 6.10e+00 2.80e+00 4.70e+00 1.20e+00]\n",
+ " [7.50e+01 6.40e+00 2.90e+00 4.30e+00 1.30e+00]\n",
+ " [7.60e+01 6.60e+00 3.00e+00 4.40e+00 1.40e+00]\n",
+ " [7.70e+01 6.80e+00 2.80e+00 4.80e+00 1.40e+00]\n",
+ " [7.80e+01 6.70e+00 3.00e+00 5.00e+00 1.70e+00]\n",
+ " [7.90e+01 6.00e+00 2.90e+00 4.50e+00 1.50e+00]\n",
+ " [8.00e+01 5.70e+00 2.60e+00 3.50e+00 1.00e+00]\n",
+ " [8.10e+01 5.50e+00 2.40e+00 3.80e+00 1.10e+00]\n",
+ " [8.20e+01 5.50e+00 2.40e+00 3.70e+00 1.00e+00]\n",
+ " [8.30e+01 5.80e+00 2.70e+00 3.90e+00 1.20e+00]\n",
+ " [8.40e+01 6.00e+00 2.70e+00 5.10e+00 1.60e+00]\n",
+ " [8.50e+01 5.40e+00 3.00e+00 4.50e+00 1.50e+00]\n",
+ " [8.60e+01 6.00e+00 3.40e+00 4.50e+00 1.60e+00]\n",
+ " [8.70e+01 6.70e+00 3.10e+00 4.70e+00 1.50e+00]\n",
+ " [8.80e+01 6.30e+00 2.30e+00 4.40e+00 1.30e+00]\n",
+ " [8.90e+01 5.60e+00 3.00e+00 4.10e+00 1.30e+00]\n",
+ " [9.00e+01 5.50e+00 2.50e+00 4.00e+00 1.30e+00]\n",
+ " [9.10e+01 5.50e+00 2.60e+00 4.40e+00 1.20e+00]\n",
+ " [9.20e+01 6.10e+00 3.00e+00 4.60e+00 1.40e+00]\n",
+ " [9.30e+01 5.80e+00 2.60e+00 4.00e+00 1.20e+00]\n",
+ " [9.40e+01 5.00e+00 2.30e+00 3.30e+00 1.00e+00]\n",
+ " [9.50e+01 5.60e+00 2.70e+00 4.20e+00 1.30e+00]\n",
+ " [9.60e+01 5.70e+00 3.00e+00 4.20e+00 1.20e+00]\n",
+ " [9.70e+01 5.70e+00 2.90e+00 4.20e+00 1.30e+00]\n",
+ " [9.80e+01 6.20e+00 2.90e+00 4.30e+00 1.30e+00]\n",
+ " [9.90e+01 5.10e+00 2.50e+00 3.00e+00 1.10e+00]\n",
+ " [1.00e+02 5.70e+00 2.80e+00 4.10e+00 1.30e+00]\n",
+ " [1.01e+02 6.30e+00 3.30e+00 6.00e+00 2.50e+00]\n",
+ " [1.02e+02 5.80e+00 2.70e+00 5.10e+00 1.90e+00]\n",
+ " [1.03e+02 7.10e+00 3.00e+00 5.90e+00 2.10e+00]\n",
+ " [1.04e+02 6.30e+00 2.90e+00 5.60e+00 1.80e+00]\n",
+ " [1.05e+02 6.50e+00 3.00e+00 5.80e+00 2.20e+00]\n",
+ " [1.06e+02 7.60e+00 3.00e+00 6.60e+00 2.10e+00]\n",
+ " [1.07e+02 4.90e+00 2.50e+00 4.50e+00 1.70e+00]\n",
+ " [1.08e+02 7.30e+00 2.90e+00 6.30e+00 1.80e+00]\n",
+ " [1.09e+02 6.70e+00 2.50e+00 5.80e+00 1.80e+00]\n",
+ " [1.10e+02 7.20e+00 3.60e+00 6.10e+00 2.50e+00]\n",
+ " [1.11e+02 6.50e+00 3.20e+00 5.10e+00 2.00e+00]\n",
+ " [1.12e+02 6.40e+00 2.70e+00 5.30e+00 1.90e+00]\n",
+ " [1.13e+02 6.80e+00 3.00e+00 5.50e+00 2.10e+00]\n",
+ " [1.14e+02 5.70e+00 2.50e+00 5.00e+00 2.00e+00]\n",
+ " [1.15e+02 5.80e+00 2.80e+00 5.10e+00 2.40e+00]\n",
+ " [1.16e+02 6.40e+00 3.20e+00 5.30e+00 2.30e+00]\n",
+ " [1.17e+02 6.50e+00 3.00e+00 5.50e+00 1.80e+00]\n",
+ " [1.18e+02 7.70e+00 3.80e+00 6.70e+00 2.20e+00]\n",
+ " [1.19e+02 7.70e+00 2.60e+00 6.90e+00 2.30e+00]\n",
+ " [1.20e+02 6.00e+00 2.20e+00 5.00e+00 1.50e+00]\n",
+ " [1.21e+02 6.90e+00 3.20e+00 5.70e+00 2.30e+00]\n",
+ " [1.22e+02 5.60e+00 2.80e+00 4.90e+00 2.00e+00]\n",
+ " [1.23e+02 7.70e+00 2.80e+00 6.70e+00 2.00e+00]\n",
+ " [1.24e+02 6.30e+00 2.70e+00 4.90e+00 1.80e+00]\n",
+ " [1.25e+02 6.70e+00 3.30e+00 5.70e+00 2.10e+00]\n",
+ " [1.26e+02 7.20e+00 3.20e+00 6.00e+00 1.80e+00]\n",
+ " [1.27e+02 6.20e+00 2.80e+00 4.80e+00 1.80e+00]\n",
+ " [1.28e+02 6.10e+00 3.00e+00 4.90e+00 1.80e+00]\n",
+ " [1.29e+02 6.40e+00 2.80e+00 5.60e+00 2.10e+00]\n",
+ " [1.30e+02 7.20e+00 3.00e+00 5.80e+00 1.60e+00]\n",
+ " [1.31e+02 7.40e+00 2.80e+00 6.10e+00 1.90e+00]\n",
+ " [1.32e+02 7.90e+00 3.80e+00 6.40e+00 2.00e+00]\n",
+ " [1.33e+02 6.40e+00 2.80e+00 5.60e+00 2.20e+00]\n",
+ " [1.34e+02 6.30e+00 2.80e+00 5.10e+00 1.50e+00]\n",
+ " [1.35e+02 6.10e+00 2.60e+00 5.60e+00 1.40e+00]\n",
+ " [1.36e+02 7.70e+00 3.00e+00 6.10e+00 2.30e+00]\n",
+ " [1.37e+02 6.30e+00 3.40e+00 5.60e+00 2.40e+00]\n",
+ " [1.38e+02 6.40e+00 3.10e+00 5.50e+00 1.80e+00]\n",
+ " [1.39e+02 6.00e+00 3.00e+00 4.80e+00 1.80e+00]\n",
+ " [1.40e+02 6.90e+00 3.10e+00 5.40e+00 2.10e+00]\n",
+ " [1.41e+02 6.70e+00 3.10e+00 5.60e+00 2.40e+00]\n",
+ " [1.42e+02 6.90e+00 3.10e+00 5.10e+00 2.30e+00]\n",
+ " [1.43e+02 5.80e+00 2.70e+00 5.10e+00 1.90e+00]\n",
+ " [1.44e+02 6.80e+00 3.20e+00 5.90e+00 2.30e+00]\n",
+ " [1.45e+02 6.70e+00 3.30e+00 5.70e+00 2.50e+00]\n",
+ " [1.46e+02 6.70e+00 3.00e+00 5.20e+00 2.30e+00]\n",
+ " [1.47e+02 6.30e+00 2.50e+00 5.00e+00 1.90e+00]\n",
+ " [1.48e+02 6.50e+00 3.00e+00 5.20e+00 2.00e+00]\n",
+ " [1.49e+02 6.20e+00 3.40e+00 5.40e+00 2.30e+00]\n",
+ " [1.50e+02 5.90e+00 3.00e+00 5.10e+00 1.80e+00]] ['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(X,Y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Srinivas\\Anaconda3\\envs\\myenvs\\lib\\site-packages\\sklearn\\utils\\deprecation.py:66: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
+ " warnings.warn(msg, category=DeprecationWarning)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# trating missing values\n",
+ "from sklearn.preprocessing import Imputer\n",
+ "imputer=Imputer(missing_values=\"NaN\",strategy=\"mean\", axis=0)\n",
+ "imputer=imputer.fit(X[:,0:5])\n",
+ "X[:,0:5]=imputer.transform(X[:,0:5])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Imputer is a predefined class in scikit-learn package,The instance created to imputer object is passed with the arguments of missing values, the strategy for replacement which can either be mean, median or any statistical substituion.\n",
+ "The instance created needs to be fitted to our data, this is accomplished by 'fit' method\n",
+ "finally the missing values in data are substituted with desired values with 'transform' method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "xtrain,xtest,ytrain,ytest= train_test_split(X, Y, test_size = 0.3333333, random_state =0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from math import sqrt\n",
+ "def euclidean_distance(row1, row2):\n",
+ " distance = 0.0\n",
+ " for i in range(len(row1)-1):\n",
+ " distance += ((row1[i]) - (row2[i]))**2\n",
+ " return sqrt(distance)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "correct=0\n",
+ "total=0 \n",
+ "for j in range(45):\n",
+ " row1=xtest[j].tolist() \n",
+ " distances=[] \n",
+ " for i in range(100):\n",
+ " row2=xtrain[i].tolist() \n",
+ " distance=euclidean_distance(row1,row2)\n",
+ " distances.append((distance,ytrain[i]))\n",
+ " votes=[i[1] for i in sorted(distances) [:5]] \n",
+ " res=Counter(votes).most_common(1)[0][0]\n",
+ " if(res==ytest[j]): \n",
+ " correct+=1\n",
+ " total+=1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(correct/total)"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "USING SCIKIT-LEARN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.neighbors import KNeighborsClassifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)\n",
+ "classifier.fit(xtrain,ytrain)\n",
+ "prediction=classifier.predict(xtest)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Metric 'minkowski' is used for measuring the euclidean distance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
+ " 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-virginica' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(ytest)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
+ " 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
+ " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+ " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
+ " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
+ " 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n",
+ " 'Iris-virginica' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'\n",
+ " 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n",
+ " 'Iris-virginica' 'Iris-virginica']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(prediction)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "both the ytest and prediction give the same result."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.metrics import confusion_matrix\n",
+ "cm = confusion_matrix(ytest, prediction)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[16, 0, 0],\n",
+ " [ 0, 19, 0],\n",
+ " [ 0, 0, 15]], dtype=int64)"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cm"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "confusion matrix gives the statistics of correct and incorrect predictions, hence, the there are no incorrect predictions in the matrix."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}