{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib\n", "#matplotlib.use('Agg')\n", "path_data = '../../../../data/'\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import numpy as np\n", "import pandas as pd\n", "import math\n", "import scipy.stats as stats\n", "plt.style.use('fivethirtyeight')\n", "\n", "import warnings\n", "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def standard_units(x):\n", " return (x - np.mean(x))/np.std(x)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def distance(point1, point2):\n", " \"\"\"The distance between two arrays of numbers.\"\"\"\n", " return np.sqrt(np.sum((point1 - point2)**2))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def all_distances(training, point):\n", " \"\"\"The distance between p (an array of numbers) and the numbers in row i of attribute_table.\"\"\"\n", " attributes = training.drop('Class')\n", " def distance_from_point(row):\n", " return distance(point, np.array(row))\n", " #return attributes.apply(distance_from_point)\n", " return attributes.map(distance_from_point) #--- check map function" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def table_with_distances(training, point):\n", " \"\"\"A copy of the training table with the distance from each row to array p.\"\"\"\n", " #return training.with_column('Distance', all_distances(training, point))\n", " training['Distance'] = all_distances(training, point)\n", " return training" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def closest(training, point, k):\n", " \"\"\"A table containing the k closest rows in the training table to array p.\"\"\"\n", " with_dists = table_with_distances(training, point)\n", " sorted_by_distance = with_dists.sort_values(by=['Distance'])\n", " topk = sorted_by_distance.take(np.arange(k))\n", " return topk" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeBlood PressureSpecific GravityAlbuminSugarRed Blood CellsPus CellPus Cell clumpsBacteriaGlucose...Packed Cell VolumeWhite Blood Cell CountRed Blood Cell CountHypertensionDiabetes MellitusCoronary Artery DiseaseAppetitePedal EdemaAnemiaClass
048701.00540normalabnormalpresentnotpresent117...3267003.9yesnonopooryesyes1
153901.02020abnormalabnormalpresentnotpresent70...29121003.7yesyesnopoornoyes1
263701.01030abnormalabnormalpresentnotpresent380...3245003.8yesyesnopooryesno1
368801.01032normalabnormalpresentpresent157...16110002.6yesyesyespooryesno1
461801.01520abnormalabnormalnotpresentnotpresent173...2492003.2yesyesyespooryesyes1
..................................................................
15355801.02000normalnormalnotpresentnotpresent140...4767004.9nononogoodnono0
15442701.02500normalnormalnotpresentnotpresent75...5478006.2nononogoodnono0
15512801.02000normalnormalnotpresentnotpresent100...4966005.4nononogoodnono0
15617601.02500normalnormalnotpresentnotpresent114...5172005.9nononogoodnono0
15758801.02500normalnormalnotpresentnotpresent131...5368006.1nononogoodnono0
\n", "

158 rows × 25 columns

\n", "
" ], "text/plain": [ " Age Blood Pressure Specific Gravity Albumin Sugar Red Blood Cells \\\n", "0 48 70 1.005 4 0 normal \n", "1 53 90 1.020 2 0 abnormal \n", "2 63 70 1.010 3 0 abnormal \n", "3 68 80 1.010 3 2 normal \n", "4 61 80 1.015 2 0 abnormal \n", ".. ... ... ... ... ... ... \n", "153 55 80 1.020 0 0 normal \n", "154 42 70 1.025 0 0 normal \n", "155 12 80 1.020 0 0 normal \n", "156 17 60 1.025 0 0 normal \n", "157 58 80 1.025 0 0 normal \n", "\n", " Pus Cell Pus Cell clumps Bacteria Glucose ... Packed Cell Volume \\\n", "0 abnormal present notpresent 117 ... 32 \n", "1 abnormal present notpresent 70 ... 29 \n", "2 abnormal present notpresent 380 ... 32 \n", "3 abnormal present present 157 ... 16 \n", "4 abnormal notpresent notpresent 173 ... 24 \n", ".. ... ... ... ... ... ... \n", "153 normal notpresent notpresent 140 ... 47 \n", "154 normal notpresent notpresent 75 ... 54 \n", "155 normal notpresent notpresent 100 ... 49 \n", "156 normal notpresent notpresent 114 ... 51 \n", "157 normal notpresent notpresent 131 ... 53 \n", "\n", " White Blood Cell Count Red Blood Cell Count Hypertension \\\n", "0 6700 3.9 yes \n", "1 12100 3.7 yes \n", "2 4500 3.8 yes \n", "3 11000 2.6 yes \n", "4 9200 3.2 yes \n", ".. ... ... ... \n", "153 6700 4.9 no \n", "154 7800 6.2 no \n", "155 6600 5.4 no \n", "156 7200 5.9 no \n", "157 6800 6.1 no \n", "\n", " Diabetes Mellitus Coronary Artery Disease Appetite Pedal Edema Anemia \\\n", "0 no no poor yes yes \n", "1 yes no poor no yes \n", "2 yes no poor yes no \n", "3 yes yes poor yes no \n", "4 yes yes poor yes yes \n", ".. ... ... ... ... ... \n", "153 no no good no no \n", "154 no no good no no \n", "155 no no good no no \n", "156 no no good no no \n", "157 no no good no no \n", "\n", " Class \n", "0 1 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 1 \n", ".. ... \n", "153 0 \n", "154 0 \n", "155 0 \n", "156 0 \n", "157 0 \n", "\n", "[158 rows x 25 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ckd1 = pd.read_csv(path_data + 'ckd.csv')\n", "ckd1.rename(columns={'Blood Glucose Random':'Glucose'}, inplace=True)\n", "ckd1" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HemoglobinGlucoseWhite Blood Cell CountClass
0-0.865744-0.221549-0.5697681
1-1.457446-0.9475971.1626841
2-1.0049683.841231-1.2755821
3-2.8148790.3963640.8097771
4-2.0839540.6435290.2322931
...............
1530.7005260.133751-0.5697680
1540.978974-0.870358-0.2168610
1550.735332-0.484162-0.6018500
1560.178436-0.267893-0.4093560
1570.735332-0.005280-0.5376860
\n", "

158 rows × 4 columns

\n", "
" ], "text/plain": [ " Hemoglobin Glucose White Blood Cell Count Class\n", "0 -0.865744 -0.221549 -0.569768 1\n", "1 -1.457446 -0.947597 1.162684 1\n", "2 -1.004968 3.841231 -1.275582 1\n", "3 -2.814879 0.396364 0.809777 1\n", "4 -2.083954 0.643529 0.232293 1\n", ".. ... ... ... ...\n", "153 0.700526 0.133751 -0.569768 0\n", "154 0.978974 -0.870358 -0.216861 0\n", "155 0.735332 -0.484162 -0.601850 0\n", "156 0.178436 -0.267893 -0.409356 0\n", "157 0.735332 -0.005280 -0.537686 0\n", "\n", "[158 rows x 4 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ckd = pd.DataFrame({'Hemoglobin':standard_units(ckd1['Hemoglobin']), \n", " 'Glucose':standard_units(ckd1['Glucose']), \n", " 'White Blood Cell Count':standard_units(ckd1['White Blood Cell Count']), \n", " 'Class':ckd1['Class'].astype(str)})\n", "\n", "#type(ckd_su['Class'][0])\n", "\n", "ckd" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "color_table = pd.DataFrame(\n", " {'Class':np.array([1, 0]),\n", " 'Color':np.array(['darkblue', 'gold'])}, index=np.array([1,0]))\n", " \n", "color_table['Class'] = color_table['Class'].astype(str)\n", "\n", "ckd = pd.merge(ckd, color_table, on='Class')\n", "\n", "class_1 = ckd['Class']\n", "\n", "ckd.pop('Class')\n", "\n", "ckd.insert(0, 'Class', class_1)\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ClassHemoglobinGlucoseWhite Blood Cell CountColor
01-0.865744-0.221549-0.569768darkblue
11-1.457446-0.9475971.162684darkblue
21-1.0049683.841231-1.275582darkblue
31-2.8148790.3963640.809777darkblue
41-2.0839540.6435290.232293darkblue
..................
15300.7005260.133751-0.569768gold
15400.978974-0.870358-0.216861gold
15500.735332-0.484162-0.601850gold
15600.178436-0.267893-0.409356gold
15700.735332-0.005280-0.537686gold
\n", "

158 rows × 5 columns

\n", "
" ], "text/plain": [ " Class Hemoglobin Glucose White Blood Cell Count Color\n", "0 1 -0.865744 -0.221549 -0.569768 darkblue\n", "1 1 -1.457446 -0.947597 1.162684 darkblue\n", "2 1 -1.004968 3.841231 -1.275582 darkblue\n", "3 1 -2.814879 0.396364 0.809777 darkblue\n", "4 1 -2.083954 0.643529 0.232293 darkblue\n", ".. ... ... ... ... ...\n", "153 0 0.700526 0.133751 -0.569768 gold\n", "154 0 0.978974 -0.870358 -0.216861 gold\n", "155 0 0.735332 -0.484162 -0.601850 gold\n", "156 0 0.178436 -0.267893 -0.409356 gold\n", "157 0 0.735332 -0.005280 -0.537686 gold\n", "\n", "[158 rows x 5 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ckd" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "glucose_color_darkblue = ckd[ckd['Color'] == 'darkblue']\n", "glucose_color_gold = ckd[ckd['Color'] == 'gold']\n", "\n", "\n", "fig, ax = plt.subplots(figsize=(7,6))\n", "\n", "ax.scatter(glucose_color_darkblue['Hemoglobin'], \n", " glucose_color_darkblue['Glucose'], \n", " label='Color=darkblue', \n", " color='darkblue')\n", "\n", "ax.scatter(glucose_color_gold['Hemoglobin'], \n", " glucose_color_gold['Glucose'], \n", " label='Color=gold', \n", " color='gold')\n", "\n", "x_label = 'Hemoglobin'\n", "\n", "y_label = 'Glucose'\n", "\n", "y_vals = ax.get_yticks()\n", "\n", "plt.ylabel(y_label)\n", "\n", "ax.legend(loc='upper left')\n", "\n", "plt.xlabel(x_label)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "HemoG1 = ckd.drop(columns=['White Blood Cell Count'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ClassHemoglobinGlucoseColor
01-0.865744-0.221549darkblue
11-1.457446-0.947597darkblue
21-1.0049683.841231darkblue
31-2.8148790.396364darkblue
41-2.0839540.643529darkblue
...............
15300.7005260.133751gold
15400.978974-0.870358gold
15500.735332-0.484162gold
15600.178436-0.267893gold
15700.735332-0.005280gold
\n", "

158 rows × 4 columns

\n", "
" ], "text/plain": [ " Class Hemoglobin Glucose Color\n", "0 1 -0.865744 -0.221549 darkblue\n", "1 1 -1.457446 -0.947597 darkblue\n", "2 1 -1.004968 3.841231 darkblue\n", "3 1 -2.814879 0.396364 darkblue\n", "4 1 -2.083954 0.643529 darkblue\n", ".. ... ... ... ...\n", "153 0 0.700526 0.133751 gold\n", "154 0 0.978974 -0.870358 gold\n", "155 0 0.735332 -0.484162 gold\n", "156 0 0.178436 -0.267893 gold\n", "157 0 0.735332 -0.005280 gold\n", "\n", "[158 rows x 4 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HemoG1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### step by step" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'HemoG1' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtraining\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mHemoG1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mpoint1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpoint2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1.5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'HemoG1' is not defined" ] } ], "source": [ "training = HemoG1\n", "point1 = point[0]\n", "point2 = point[1]\n", "point = np.array([0, 1.5])\n", "\n", "def distance(point1, point2):\n", " \"\"\"The distance between two arrays of numbers.\"\"\"\n", " return np.sqrt(np.sum((point1 - point2)**2))\n", "\n", "np.sqrt(np.sum((point1 - point2)**2))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'HemoG1' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtraining1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mHemoG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Class'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Color'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;31m#training1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#train = np.array([training[:,1], training[:,2]])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'HemoG1' is not defined" ] } ], "source": [ "training1 = HemoG1.drop(columns=['Class', 'Color'])\n", "#training1\n", "\n", "#train = np.array([training[:,1], training[:,2]])\n", "\n", "#point1 = np.array([0, 1.5])\n", "\n", "#train = []\n", "\n", "#for i in range(len(training)):\n", " #j = training['Hemoglobin'].iloc[i]\n", " #k = training['Glucose'].iloc[i]\n", " #print(j,k)\n", " #train.append([j, k])\n", " #i += 1\n", "\n", "#train\n", "\n", "#point1 = np.array([0, 1.5])\n", "\n", "point2 = np.array(training)\n", "\n", "#point2 = np.array([training])\n", "\n", "#np.sqrt(np.sum((point1 - point2)**2))\n", "\n", "#len(point2)\n", "\n", "point2\n", "\n", "train = []\n", "n=0\n", "\n", "def all_distances(training, point):\n", " attributes = trainin-g\n", " def distance_from_point(row):\n", " return distance(point, np.array(row))\n", " \n", " return np.array(attributes.apply(distance_from_point, axis=1))\n", "\n", "#distance_from_point(point2)\n", "\n", "all_distances(training1, point)\n", "\n", "#df['add'] = df.apply(np.sum, axis = 1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#training" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'DataFrame' object has no attribute 'map'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdistance_from_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mall_distances\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mall_distances\u001b[0;34m(training, point)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m#return attributes.apply(distance_from_point)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdistance_from_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mall_distances\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 5137\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5138\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'map'" ] } ], "source": [ "training = HemoG1\n", "point = np.array([0, 1.5])\n", "#point1 = point[0]\n", "#point2 = point[1]\n", "\n", "def all_distances(training, point):\n", " \"\"\"The distance between p (an array of numbers) and the numbers in row i of attribute_table.\"\"\"\n", " attributes = training.drop(columns=['Class', 'Color'])\n", "\n", " def distance_from_point(row=attributes):\n", " return distance(point, np.array(row))\n", "#return attributes.apply(distance_from_point)\n", "\n", " return attributes.map(distance_from_point)\n", "\n", "all_distances(training, point)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "training = HemoG1\n", "point = np.array([0, 1.5])\n", "\n", "#def table_with_distances(training, point):\n", "\"\"\"A copy of the training table with the distance from each row to array p.\"\"\"\n", "#return training.with_column('Distance', all_distances(training, point))\n", "training['Distance'] = all_distances(training, point)\n", "return training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "training = HemoG1\n", "point = np.array([0, 1.5])\n", "k = 1\n", "\n", "#def closest(training, point, k):\n", "\"\"\"A table containing the k closest rows in the training table to array p.\"\"\"\n", "\n", "with_dists = table_with_distances(training, point)\n", "\n", "sorted_by_distance = with_dists.sort_values(by=['Distance'])\n", "topk = sorted_by_distance.take(np.arange(k))\n", "topk" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def show_closest(point):\n", " \"\"\"point = array([x,y]) \n", " gives the coordinates of a new point\n", " shown in red\"\"\"\n", " \n", " HemoG2 = ckd.copy()\n", " HemoG1 = HemoG2.drop(columns=['White Blood Cell Count'])\n", " \n", " t = closest(HemoGl, point, 1)\n", " x_closest = t.row(0).item(1)\n", " y_closest = t.row(0).item(2)\n", " \n", " ckd.scatter('Hemoglobin', 'Glucose', group='Color')\n", " plt.scatter(point.item(0), point.item(1), color='red', s=30)\n", " plt.plot(np.array([point.item(0), x_closest]), np.array([point.item(1), y_closest]), color='k', lw=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alice = np.array([0, 1.5])\n", "show_closest(alice)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }