diff --git a/m04_machine_learning/m04_c02_linear_regression_and_cross_validation/data/brain_and_body_weight.txt b/m04_machine_learning/m04_c02_linear_regression/data/brain_and_body_weight.txt similarity index 100% rename from m04_machine_learning/m04_c02_linear_regression_and_cross_validation/data/brain_and_body_weight.txt rename to m04_machine_learning/m04_c02_linear_regression/data/brain_and_body_weight.txt diff --git a/m04_machine_learning/m04_c02_linear_regression/m04_c02_lab.ipynb b/m04_machine_learning/m04_c02_linear_regression/m04_c02_lab.ipynb new file mode 100644 index 0000000..4f1fed2 --- /dev/null +++ b/m04_machine_learning/m04_c02_linear_regression/m04_c02_lab.ipynb @@ -0,0 +1,398 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "\"utfsm-logo\"\n", + "\n", + "# MAT281\n", + "### Aplicaciones de la Matemática en la Ingeniería" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Módulo 04\n", + "## Laboratorio Clase 02: Regresión Lineal" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Instrucciones\n", + "\n", + "\n", + "* Completa tus datos personales (nombre y rol USM) en siguiente celda.\n", + "* La escala es de 0 a 4 considerando solo valores enteros.\n", + "* Debes _pushear_ tus cambios a tu repositorio personal del curso.\n", + "* Como respaldo, debes enviar un archivo .zip con el siguiente formato `mXX_cYY_lab_apellido_nombre.zip` a alonso.ogueda@gmail.com, debe contener todo lo necesario para que se ejecute correctamente cada celda, ya sea datos, imágenes, scripts, etc.\n", + "* Se evaluará:\n", + " - Soluciones\n", + " - Código\n", + " - Que Binder esté bien configurado.\n", + " - Al presionar `Kernel -> Restart Kernel and Run All Cells` deben ejecutarse todas las celdas sin error." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "__Nombre__:\n", + "\n", + "__Rol__:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import altair as alt\n", + "\n", + "from sklearn import datasets, linear_model\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + "alt.themes.enable('opaque')\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "## Ejercicio 1: Diabetes" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Realizar análisis de regresión a los datos de diabetes disponibles en scikit-learn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "diabetes = datasets.load_diabetes()\n", + "print(dir(diabetes)) ## Atributos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "print(diabetes.DESCR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "diabetes_df = (\n", + " pd.DataFrame(\n", + " diabetes.data,\n", + " columns=diabetes.feature_names\n", + " )\n", + " .assign(prog=diabetes.target)\n", + ")\n", + "\n", + "diabetes_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "#### Pregunta 1 (1 pto):\n", + "\n", + "* ¿Por qué la columna de sexo tiene esos valores?\n", + "* ¿Cuál es la columna a predecir?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "**---TU RESPUESTA VA AQUÍ--**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "#### Pregunta 2 (1 pto)\n", + "\n", + "Realiza una regresión lineal con todas las _features_ incluyendo intercepto." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "X = diabetes_df.drop(## FIX ME PLEASE ##).values\n", + "y = diabetes_df[(## FIX ME PLEASE ##].values" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Ajusta el modelo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "regr = ## FIX ME PLEASE ##\n", + "regr.## FIX ME PLEASE ##" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Imprime el intercepto y los coeficientes luego de ajustar el modelo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "print(f\"Intercept: \\n{## FIX ME PLEASE ##}\\n\")\n", + "print(f\"Coefficients: \\n{## FIX ME PLEASE ##}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Haz una predicción del modelo con los datos `X`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "y_pred = ## FIX ME PLEASE ##" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Calcula e imprime el error cuadrático medio y el coeficiente de determinación de este modelo ajustado." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# Error cuadrático medio\n", + "print(f\"Mean squared error: {## FIX ME PLEASE ##:.2f}\\n\")\n", + "\n", + "# Coeficiente de determinación\n", + "print(f\"Coefficient of determination: {## FIX ME PLEASE ##:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "**Pregunta: ¿Qué tan bueno fue el ajuste del modelo?**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "**---TU RESPUESTA VA AQUÍ--**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Pregunta 3 (2 ptos).\n", + "\n", + "Realizar multiples regresiones lineales utilizando una sola _feature_ a la vez. \n", + "\n", + "En cada iteración:\n", + "\n", + "- Crea un arreglo `X`con solo una feature filtrando `X`.\n", + "- Crea un modelo de regresión lineal con intercepto.\n", + "- Ajusta el modelo anterior.\n", + "- Genera una predicción con el modelo.\n", + "- Calcula e imprime las métricas de la pregunta anterior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "for i in range(X.shape[1]):\n", + " X_i = X[:, np.newaxis, i] # Protip! Trata de entender este paso por tu cuenta, es muy clever\n", + " regr_i = ## FIX ME PLEASE ##\n", + " regr_i.## FIX ME PLEASE ##\n", + " y_pred_i = ## FIX ME PLEASE ##\n", + " print(f\"{diabetes_df.columns[i]}:\")\n", + " print(f\"\\tCoefficients: {## FIX ME PLEASE ##}\")\n", + " print(f\"\\tIntercept: {## FIX ME PLEASE ##}\")\n", + " print(f\"\\tMean squared error: {## FIX ME PLEASE ##:.2f}\")\n", + " print(f\"\\tCoefficient of determination: {## FIX ME PLEASE ##:.2f}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "**Si tuvieras que escoger una sola _feauture_, ¿Cuál sería? ¿Por qué?**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "**---TU RESPUESTA VA AQUÍ--**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Con la feature escogida haz el siguiente gráfico:\n", + "\n", + "- Scatter Plot\n", + "- Eje X: Valores de la feature escogida.\n", + "- Eje Y: Valores de la columna a predecir (target).\n", + "- En color rojo dibuja la recta correspondiente a la regresión lineal (utilizando `intercept_`y `coefs_`).\n", + "- Coloca un título adecuado, nombre de los ejes, etc.\n", + "\n", + "Puedes utilizar `matplotlib` o `altair`, el que prefiera." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "## FIX ME PLEASE ##" + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python [conda env:ds]", + "language": "python", + "name": "conda-env-ds-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/m04_machine_learning/m04_c02_linear_regression_and_cross_validation/m04_c02_linear_regression_and_cross_validation.ipynb b/m04_machine_learning/m04_c02_linear_regression/m04_c02_linear_regression.ipynb similarity index 99% rename from m04_machine_learning/m04_c02_linear_regression_and_cross_validation/m04_c02_linear_regression_and_cross_validation.ipynb rename to m04_machine_learning/m04_c02_linear_regression/m04_c02_linear_regression.ipynb index f4a3a43..bd6f810 100644 --- a/m04_machine_learning/m04_c02_linear_regression_and_cross_validation/m04_c02_linear_regression_and_cross_validation.ipynb +++ b/m04_machine_learning/m04_c02_linear_regression/m04_c02_linear_regression.ipynb @@ -1847,7 +1847,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1min 37s ± 4.09 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "1min 36s ± 3.84 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -1899,7 +1899,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "225 ms ± 5.41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "235 ms ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -1951,7 +1951,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "17.6 µs ± 2.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "15.7 µs ± 121 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" ] } ], @@ -2003,7 +2003,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "258 µs ± 26.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + "254 µs ± 2.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ], @@ -2562,7 +2562,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.6" }, "toc-autonumbering": false, "toc-showtags": false