diff --git a/week 2/priyatam75_week2.ipynb b/week 2/priyatam75_week2.ipynb new file mode 100644 index 0000000..a111c45 --- /dev/null +++ b/week 2/priyatam75_week2.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WEEK-2" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Learn about Counter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Counter is a class in the collections module of python, it is genearlly used to count hashable objects. it automatically creates a hash table when an iterable is invoked.\n", + "the most_common() methode is used to display the elements of object having higher frequency compared to other elements.\n" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Implementing with Iris Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "the dataset has been downloaded from kaggle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
015.13.51.40.2Iris-setosa
124.93.01.40.2Iris-setosa
234.73.21.30.2Iris-setosa
344.63.11.50.2Iris-setosa
455.03.61.40.2Iris-setosa
\n", + "
" + ], + "text/plain": [ + " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n", + "0 1 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 2 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 3 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5 5.0 3.6 1.4 0.2 Iris-setosa" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data=pd.read_csv('Iris.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# splitting into x and y\n", + "X=data.iloc[:,0:5].values\n", + "Y=data.iloc[:,5].values" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1.00e+00 5.10e+00 3.50e+00 1.40e+00 2.00e-01]\n", + " [2.00e+00 4.90e+00 3.00e+00 1.40e+00 2.00e-01]\n", + " [3.00e+00 4.70e+00 3.20e+00 1.30e+00 2.00e-01]\n", + " [4.00e+00 4.60e+00 3.10e+00 1.50e+00 2.00e-01]\n", + " [5.00e+00 5.00e+00 3.60e+00 1.40e+00 2.00e-01]\n", + " [6.00e+00 5.40e+00 3.90e+00 1.70e+00 4.00e-01]\n", + " [7.00e+00 4.60e+00 3.40e+00 1.40e+00 3.00e-01]\n", + " [8.00e+00 5.00e+00 3.40e+00 1.50e+00 2.00e-01]\n", + " [9.00e+00 4.40e+00 2.90e+00 1.40e+00 2.00e-01]\n", + " [1.00e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n", + " [1.10e+01 5.40e+00 3.70e+00 1.50e+00 2.00e-01]\n", + " [1.20e+01 4.80e+00 3.40e+00 1.60e+00 2.00e-01]\n", + " [1.30e+01 4.80e+00 3.00e+00 1.40e+00 1.00e-01]\n", + " [1.40e+01 4.30e+00 3.00e+00 1.10e+00 1.00e-01]\n", + " [1.50e+01 5.80e+00 4.00e+00 1.20e+00 2.00e-01]\n", + " [1.60e+01 5.70e+00 4.40e+00 1.50e+00 4.00e-01]\n", + " [1.70e+01 5.40e+00 3.90e+00 1.30e+00 4.00e-01]\n", + " [1.80e+01 5.10e+00 3.50e+00 1.40e+00 3.00e-01]\n", + " [1.90e+01 5.70e+00 3.80e+00 1.70e+00 3.00e-01]\n", + " [2.00e+01 5.10e+00 3.80e+00 1.50e+00 3.00e-01]\n", + " [2.10e+01 5.40e+00 3.40e+00 1.70e+00 2.00e-01]\n", + " [2.20e+01 5.10e+00 3.70e+00 1.50e+00 4.00e-01]\n", + " [2.30e+01 4.60e+00 3.60e+00 1.00e+00 2.00e-01]\n", + " [2.40e+01 5.10e+00 3.30e+00 1.70e+00 5.00e-01]\n", + " [2.50e+01 4.80e+00 3.40e+00 1.90e+00 2.00e-01]\n", + " [2.60e+01 5.00e+00 3.00e+00 1.60e+00 2.00e-01]\n", + " [2.70e+01 5.00e+00 3.40e+00 1.60e+00 4.00e-01]\n", + " [2.80e+01 5.20e+00 3.50e+00 1.50e+00 2.00e-01]\n", + " [2.90e+01 5.20e+00 3.40e+00 1.40e+00 2.00e-01]\n", + " [3.00e+01 4.70e+00 3.20e+00 1.60e+00 2.00e-01]\n", + " [3.10e+01 4.80e+00 3.10e+00 1.60e+00 2.00e-01]\n", + " [3.20e+01 5.40e+00 3.40e+00 1.50e+00 4.00e-01]\n", + " [3.30e+01 5.20e+00 4.10e+00 1.50e+00 1.00e-01]\n", + " [3.40e+01 5.50e+00 4.20e+00 1.40e+00 2.00e-01]\n", + " [3.50e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n", + " [3.60e+01 5.00e+00 3.20e+00 1.20e+00 2.00e-01]\n", + " [3.70e+01 5.50e+00 3.50e+00 1.30e+00 2.00e-01]\n", + " [3.80e+01 4.90e+00 3.10e+00 1.50e+00 1.00e-01]\n", + " [3.90e+01 4.40e+00 3.00e+00 1.30e+00 2.00e-01]\n", + " [4.00e+01 5.10e+00 3.40e+00 1.50e+00 2.00e-01]\n", + " [4.10e+01 5.00e+00 3.50e+00 1.30e+00 3.00e-01]\n", + " [4.20e+01 4.50e+00 2.30e+00 1.30e+00 3.00e-01]\n", + " [4.30e+01 4.40e+00 3.20e+00 1.30e+00 2.00e-01]\n", + " [4.40e+01 5.00e+00 3.50e+00 1.60e+00 6.00e-01]\n", + " [4.50e+01 5.10e+00 3.80e+00 1.90e+00 4.00e-01]\n", + " [4.60e+01 4.80e+00 3.00e+00 1.40e+00 3.00e-01]\n", + " [4.70e+01 5.10e+00 3.80e+00 1.60e+00 2.00e-01]\n", + " [4.80e+01 4.60e+00 3.20e+00 1.40e+00 2.00e-01]\n", + " [4.90e+01 5.30e+00 3.70e+00 1.50e+00 2.00e-01]\n", + " [5.00e+01 5.00e+00 3.30e+00 1.40e+00 2.00e-01]\n", + " [5.10e+01 7.00e+00 3.20e+00 4.70e+00 1.40e+00]\n", + " [5.20e+01 6.40e+00 3.20e+00 4.50e+00 1.50e+00]\n", + " [5.30e+01 6.90e+00 3.10e+00 4.90e+00 1.50e+00]\n", + " [5.40e+01 5.50e+00 2.30e+00 4.00e+00 1.30e+00]\n", + " [5.50e+01 6.50e+00 2.80e+00 4.60e+00 1.50e+00]\n", + " [5.60e+01 5.70e+00 2.80e+00 4.50e+00 1.30e+00]\n", + " [5.70e+01 6.30e+00 3.30e+00 4.70e+00 1.60e+00]\n", + " [5.80e+01 4.90e+00 2.40e+00 3.30e+00 1.00e+00]\n", + " [5.90e+01 6.60e+00 2.90e+00 4.60e+00 1.30e+00]\n", + " [6.00e+01 5.20e+00 2.70e+00 3.90e+00 1.40e+00]\n", + " [6.10e+01 5.00e+00 2.00e+00 3.50e+00 1.00e+00]\n", + " [6.20e+01 5.90e+00 3.00e+00 4.20e+00 1.50e+00]\n", + " [6.30e+01 6.00e+00 2.20e+00 4.00e+00 1.00e+00]\n", + " [6.40e+01 6.10e+00 2.90e+00 4.70e+00 1.40e+00]\n", + " [6.50e+01 5.60e+00 2.90e+00 3.60e+00 1.30e+00]\n", + " [6.60e+01 6.70e+00 3.10e+00 4.40e+00 1.40e+00]\n", + " [6.70e+01 5.60e+00 3.00e+00 4.50e+00 1.50e+00]\n", + " [6.80e+01 5.80e+00 2.70e+00 4.10e+00 1.00e+00]\n", + " [6.90e+01 6.20e+00 2.20e+00 4.50e+00 1.50e+00]\n", + " [7.00e+01 5.60e+00 2.50e+00 3.90e+00 1.10e+00]\n", + " [7.10e+01 5.90e+00 3.20e+00 4.80e+00 1.80e+00]\n", + " [7.20e+01 6.10e+00 2.80e+00 4.00e+00 1.30e+00]\n", + " [7.30e+01 6.30e+00 2.50e+00 4.90e+00 1.50e+00]\n", + " [7.40e+01 6.10e+00 2.80e+00 4.70e+00 1.20e+00]\n", + " [7.50e+01 6.40e+00 2.90e+00 4.30e+00 1.30e+00]\n", + " [7.60e+01 6.60e+00 3.00e+00 4.40e+00 1.40e+00]\n", + " [7.70e+01 6.80e+00 2.80e+00 4.80e+00 1.40e+00]\n", + " [7.80e+01 6.70e+00 3.00e+00 5.00e+00 1.70e+00]\n", + " [7.90e+01 6.00e+00 2.90e+00 4.50e+00 1.50e+00]\n", + " [8.00e+01 5.70e+00 2.60e+00 3.50e+00 1.00e+00]\n", + " [8.10e+01 5.50e+00 2.40e+00 3.80e+00 1.10e+00]\n", + " [8.20e+01 5.50e+00 2.40e+00 3.70e+00 1.00e+00]\n", + " [8.30e+01 5.80e+00 2.70e+00 3.90e+00 1.20e+00]\n", + " [8.40e+01 6.00e+00 2.70e+00 5.10e+00 1.60e+00]\n", + " [8.50e+01 5.40e+00 3.00e+00 4.50e+00 1.50e+00]\n", + " [8.60e+01 6.00e+00 3.40e+00 4.50e+00 1.60e+00]\n", + " [8.70e+01 6.70e+00 3.10e+00 4.70e+00 1.50e+00]\n", + " [8.80e+01 6.30e+00 2.30e+00 4.40e+00 1.30e+00]\n", + " [8.90e+01 5.60e+00 3.00e+00 4.10e+00 1.30e+00]\n", + " [9.00e+01 5.50e+00 2.50e+00 4.00e+00 1.30e+00]\n", + " [9.10e+01 5.50e+00 2.60e+00 4.40e+00 1.20e+00]\n", + " [9.20e+01 6.10e+00 3.00e+00 4.60e+00 1.40e+00]\n", + " [9.30e+01 5.80e+00 2.60e+00 4.00e+00 1.20e+00]\n", + " [9.40e+01 5.00e+00 2.30e+00 3.30e+00 1.00e+00]\n", + " [9.50e+01 5.60e+00 2.70e+00 4.20e+00 1.30e+00]\n", + " [9.60e+01 5.70e+00 3.00e+00 4.20e+00 1.20e+00]\n", + " [9.70e+01 5.70e+00 2.90e+00 4.20e+00 1.30e+00]\n", + " [9.80e+01 6.20e+00 2.90e+00 4.30e+00 1.30e+00]\n", + " [9.90e+01 5.10e+00 2.50e+00 3.00e+00 1.10e+00]\n", + " [1.00e+02 5.70e+00 2.80e+00 4.10e+00 1.30e+00]\n", + " [1.01e+02 6.30e+00 3.30e+00 6.00e+00 2.50e+00]\n", + " [1.02e+02 5.80e+00 2.70e+00 5.10e+00 1.90e+00]\n", + " [1.03e+02 7.10e+00 3.00e+00 5.90e+00 2.10e+00]\n", + " [1.04e+02 6.30e+00 2.90e+00 5.60e+00 1.80e+00]\n", + " [1.05e+02 6.50e+00 3.00e+00 5.80e+00 2.20e+00]\n", + " [1.06e+02 7.60e+00 3.00e+00 6.60e+00 2.10e+00]\n", + " [1.07e+02 4.90e+00 2.50e+00 4.50e+00 1.70e+00]\n", + " [1.08e+02 7.30e+00 2.90e+00 6.30e+00 1.80e+00]\n", + " [1.09e+02 6.70e+00 2.50e+00 5.80e+00 1.80e+00]\n", + " [1.10e+02 7.20e+00 3.60e+00 6.10e+00 2.50e+00]\n", + " [1.11e+02 6.50e+00 3.20e+00 5.10e+00 2.00e+00]\n", + " [1.12e+02 6.40e+00 2.70e+00 5.30e+00 1.90e+00]\n", + " [1.13e+02 6.80e+00 3.00e+00 5.50e+00 2.10e+00]\n", + " [1.14e+02 5.70e+00 2.50e+00 5.00e+00 2.00e+00]\n", + " [1.15e+02 5.80e+00 2.80e+00 5.10e+00 2.40e+00]\n", + " [1.16e+02 6.40e+00 3.20e+00 5.30e+00 2.30e+00]\n", + " [1.17e+02 6.50e+00 3.00e+00 5.50e+00 1.80e+00]\n", + " [1.18e+02 7.70e+00 3.80e+00 6.70e+00 2.20e+00]\n", + " [1.19e+02 7.70e+00 2.60e+00 6.90e+00 2.30e+00]\n", + " [1.20e+02 6.00e+00 2.20e+00 5.00e+00 1.50e+00]\n", + " [1.21e+02 6.90e+00 3.20e+00 5.70e+00 2.30e+00]\n", + " [1.22e+02 5.60e+00 2.80e+00 4.90e+00 2.00e+00]\n", + " [1.23e+02 7.70e+00 2.80e+00 6.70e+00 2.00e+00]\n", + " [1.24e+02 6.30e+00 2.70e+00 4.90e+00 1.80e+00]\n", + " [1.25e+02 6.70e+00 3.30e+00 5.70e+00 2.10e+00]\n", + " [1.26e+02 7.20e+00 3.20e+00 6.00e+00 1.80e+00]\n", + " [1.27e+02 6.20e+00 2.80e+00 4.80e+00 1.80e+00]\n", + " [1.28e+02 6.10e+00 3.00e+00 4.90e+00 1.80e+00]\n", + " [1.29e+02 6.40e+00 2.80e+00 5.60e+00 2.10e+00]\n", + " [1.30e+02 7.20e+00 3.00e+00 5.80e+00 1.60e+00]\n", + " [1.31e+02 7.40e+00 2.80e+00 6.10e+00 1.90e+00]\n", + " [1.32e+02 7.90e+00 3.80e+00 6.40e+00 2.00e+00]\n", + " [1.33e+02 6.40e+00 2.80e+00 5.60e+00 2.20e+00]\n", + " [1.34e+02 6.30e+00 2.80e+00 5.10e+00 1.50e+00]\n", + " [1.35e+02 6.10e+00 2.60e+00 5.60e+00 1.40e+00]\n", + " [1.36e+02 7.70e+00 3.00e+00 6.10e+00 2.30e+00]\n", + " [1.37e+02 6.30e+00 3.40e+00 5.60e+00 2.40e+00]\n", + " [1.38e+02 6.40e+00 3.10e+00 5.50e+00 1.80e+00]\n", + " [1.39e+02 6.00e+00 3.00e+00 4.80e+00 1.80e+00]\n", + " [1.40e+02 6.90e+00 3.10e+00 5.40e+00 2.10e+00]\n", + " [1.41e+02 6.70e+00 3.10e+00 5.60e+00 2.40e+00]\n", + " [1.42e+02 6.90e+00 3.10e+00 5.10e+00 2.30e+00]\n", + " [1.43e+02 5.80e+00 2.70e+00 5.10e+00 1.90e+00]\n", + " [1.44e+02 6.80e+00 3.20e+00 5.90e+00 2.30e+00]\n", + " [1.45e+02 6.70e+00 3.30e+00 5.70e+00 2.50e+00]\n", + " [1.46e+02 6.70e+00 3.00e+00 5.20e+00 2.30e+00]\n", + " [1.47e+02 6.30e+00 2.50e+00 5.00e+00 1.90e+00]\n", + " [1.48e+02 6.50e+00 3.00e+00 5.20e+00 2.00e+00]\n", + " [1.49e+02 6.20e+00 3.40e+00 5.40e+00 2.30e+00]\n", + " [1.50e+02 5.90e+00 3.00e+00 5.10e+00 1.80e+00]] ['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica']\n" + ] + } + ], + "source": [ + "print(X,Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Srinivas\\Anaconda3\\envs\\myenvs\\lib\\site-packages\\sklearn\\utils\\deprecation.py:66: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", + " warnings.warn(msg, category=DeprecationWarning)\n" + ] + } + ], + "source": [ + "# trating missing values\n", + "from sklearn.preprocessing import Imputer\n", + "imputer=Imputer(missing_values=\"NaN\",strategy=\"mean\", axis=0)\n", + "imputer=imputer.fit(X[:,0:5])\n", + "X[:,0:5]=imputer.transform(X[:,0:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Imputer is a predefined class in scikit-learn package,The instance created to imputer object is passed with the arguments of missing values, the strategy for replacement which can either be mean, median or any statistical substituion.\n", + "The instance created needs to be fitted to our data, this is accomplished by 'fit' method\n", + "finally the missing values in data are substituted with desired values with 'transform' method." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "xtrain,xtest,ytrain,ytest= train_test_split(X, Y, test_size = 0.3333333, random_state =0)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from math import sqrt\n", + "def euclidean_distance(row1, row2):\n", + " distance = 0.0\n", + " for i in range(len(row1)-1):\n", + " distance += ((row1[i]) - (row2[i]))**2\n", + " return sqrt(distance)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "correct=0\n", + "total=0 \n", + "for j in range(45):\n", + " row1=xtest[j].tolist() \n", + " distances=[] \n", + " for i in range(100):\n", + " row2=xtrain[i].tolist() \n", + " distance=euclidean_distance(row1,row2)\n", + " distances.append((distance,ytrain[i]))\n", + " votes=[i[1] for i in sorted(distances) [:5]] \n", + " res=Counter(votes).most_common(1)[0][0]\n", + " if(res==ytest[j]): \n", + " correct+=1\n", + " total+=1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n" + ] + } + ], + "source": [ + "print(correct/total)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "USING SCIKIT-LEARN" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)\n", + "classifier.fit(xtrain,ytrain)\n", + "prediction=classifier.predict(xtest)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Metric 'minkowski' is used for measuring the euclidean distance." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n", + " 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-virginica' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica']\n" + ] + } + ], + "source": [ + "print(ytest)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n", + " 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n", + " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n", + " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n", + " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n", + " 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'\n", + " 'Iris-virginica' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'\n", + " 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'\n", + " 'Iris-virginica' 'Iris-virginica']\n" + ] + } + ], + "source": [ + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "both the ytest and prediction give the same result." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "cm = confusion_matrix(ytest, prediction)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[16, 0, 0],\n", + " [ 0, 19, 0],\n", + " [ 0, 0, 15]], dtype=int64)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "confusion matrix gives the statistics of correct and incorrect predictions, hence, the there are no incorrect predictions in the matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}