diff --git a/.ipynb_checkpoints/Limpieza de datos-checkpoint.ipynb b/.ipynb_checkpoints/Limpieza de datos-checkpoint.ipynb
new file mode 100644
index 0000000..f40fe1c
--- /dev/null
+++ b/.ipynb_checkpoints/Limpieza de datos-checkpoint.ipynb
@@ -0,0 +1,539 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurements.csv\") #cargamos el csv."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ " refill liters | \n",
+ " refill gas | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28 | \n",
+ " 5 | \n",
+ " 26 | \n",
+ " 21,5 | \n",
+ " 12 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 45 | \n",
+ " E10 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12 | \n",
+ " 4,2 | \n",
+ " 30 | \n",
+ " 21,5 | \n",
+ " 13 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11,2 | \n",
+ " 5,5 | \n",
+ " 38 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12,9 | \n",
+ " 3,9 | \n",
+ " 36 | \n",
+ " 21,5 | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18,5 | \n",
+ " 4,5 | \n",
+ " 46 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28 5 26 21,5 12 NaN E10 0 \n",
+ "1 12 4,2 30 21,5 13 NaN E10 0 \n",
+ "2 11,2 5,5 38 21,5 15 NaN E10 0 \n",
+ "3 12,9 3,9 36 21,5 14 NaN E10 0 \n",
+ "4 18,5 4,5 46 21,5 15 NaN E10 0 \n",
+ "\n",
+ " rain sun refill liters refill gas \n",
+ "0 0 0 45 E10 \n",
+ "1 0 0 NaN NaN \n",
+ "2 0 0 NaN NaN \n",
+ "3 0 0 NaN NaN \n",
+ "4 0 0 NaN NaN "
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Exploro la forma del dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(388, 12)"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Veo la naturaleza de los datos:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 388 entries, 0 to 387\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null object\n",
+ " 1 consume 388 non-null object\n",
+ " 2 speed 388 non-null int64 \n",
+ " 3 temp_inside 376 non-null object\n",
+ " 4 temp_outside 388 non-null int64 \n",
+ " 5 specials 93 non-null object\n",
+ " 6 gas_type 388 non-null object\n",
+ " 7 AC 388 non-null int64 \n",
+ " 8 rain 388 non-null int64 \n",
+ " 9 sun 388 non-null int64 \n",
+ " 10 refill liters 13 non-null object\n",
+ " 11 refill gas 13 non-null object\n",
+ "dtypes: int64(5), object(7)\n",
+ "memory usage: 36.5+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Compruebo que no hay duplicados:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=df.drop_duplicates() #no hay duplicados."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(388, 12)"
+ ]
+ },
+ "execution_count": 109,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Compruebo dónde hay valores nulos:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "distance 0\n",
+ "consume 0\n",
+ "speed 0\n",
+ "temp_inside 12\n",
+ "temp_outside 0\n",
+ "specials 295\n",
+ "gas_type 0\n",
+ "AC 0\n",
+ "rain 0\n",
+ "sun 0\n",
+ "refill liters 375\n",
+ "refill gas 375\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Relleno los nulos con \"Unkown\" al tratarse de variables categóricas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 388 entries, 0 to 387\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null object\n",
+ " 1 consume 388 non-null object\n",
+ " 2 speed 388 non-null int64 \n",
+ " 3 temp_inside 376 non-null object\n",
+ " 4 temp_outside 388 non-null int64 \n",
+ " 5 specials 93 non-null object\n",
+ " 6 gas_type 388 non-null object\n",
+ " 7 AC 388 non-null int64 \n",
+ " 8 rain 388 non-null int64 \n",
+ " 9 sun 388 non-null int64 \n",
+ " 10 refill liters 13 non-null object\n",
+ " 11 refill gas 13 non-null object\n",
+ "dtypes: int64(5), object(7)\n",
+ "memory usage: 39.4+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['temp_inside'] = pd.to_numeric(df['temp_inside'],errors = 'coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#relleno los Nan con la media de las temperaturas al ser una variable numérica:\n",
+ "df[\"temp_inside\"].fillna(df[\"temp_inside\"].mean(), inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#relleno los Nan de \"specials\" con \"Unkown\" al ser categórica:\n",
+ "df.specials.fillna((\"Unknown\"), inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#elimino las columnas de \"refill liters\" y \"refill gas\" por el alto porcentaje de Nans que contienen:\n",
+ "df.drop([\"refill liters\",\"refill gas\"], axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "distance 0\n",
+ "consume 0\n",
+ "speed 0\n",
+ "temp_inside 0\n",
+ "temp_outside 0\n",
+ "specials 0\n",
+ "gas_type 0\n",
+ "AC 0\n",
+ "rain 0\n",
+ "sun 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 118,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 215 | \n",
+ " 12,1 | \n",
+ " 4,4 | \n",
+ " 33 | \n",
+ " 21.934911 | \n",
+ " 5 | \n",
+ " Unknown | \n",
+ " SP98 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "215 12,1 4,4 33 21.934911 5 Unknown SP98 0 \n",
+ "\n",
+ " rain sun \n",
+ "215 0 0 "
+ ]
+ },
+ "execution_count": 120,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.sample()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Guardo el dataset limpio:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv('measurementsclean.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/.ipynb_checkpoints/Machine Learning- Predictions-checkpoint.ipynb b/.ipynb_checkpoints/Machine Learning- Predictions-checkpoint.ipynb
new file mode 100644
index 0000000..8d427b5
--- /dev/null
+++ b/.ipynb_checkpoints/Machine Learning- Predictions-checkpoint.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn.model_selection import train_test_split as tts\n",
+ "from sklearn import metrics\n",
+ "import numpy as np\n",
+ "from sklearn.linear_model import LinearRegression as LinReg\n",
+ "from sklearn.linear_model import Ridge, Lasso\n",
+ "from sklearn.linear_model import SGDRegressor\n",
+ "from sklearn.neighbors import KNeighborsRegressor\n",
+ "from sklearn.ensemble import GradientBoostingRegressor\n",
+ "from sklearn.svm import SVR\n",
+ "from sklearn.model_selection import cross_val_score as cvs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurementsclean.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 28.0 | \n",
+ " 5.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 12.0 | \n",
+ " 4.2 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " 11.2 | \n",
+ " 5.5 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " 12.9 | \n",
+ " 3.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " 18.5 | \n",
+ " 4.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 distance consume speed temp_inside temp_outside specials \\\n",
+ "0 0 28.0 5.0 26 21.934911 12 Unknown \n",
+ "1 1 12.0 4.2 30 21.934911 13 Unknown \n",
+ "2 2 11.2 5.5 38 21.934911 15 Unknown \n",
+ "3 3 12.9 3.9 36 21.934911 14 Unknown \n",
+ "4 4 18.5 4.5 46 21.934911 15 Unknown \n",
+ "\n",
+ " gas_type AC rain sun \n",
+ "0 E10 0 0 0 \n",
+ "1 E10 0 0 0 \n",
+ "2 E10 0 0 0 \n",
+ "3 E10 0 0 0 \n",
+ "4 E10 0 0 0 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12.0 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11.2 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance speed temp_inside temp_outside specials gas_type AC rain sun\n",
+ "0 28.0 26 21.934911 12 Unknown E10 0 0 0\n",
+ "1 12.0 30 21.934911 13 Unknown E10 0 0 0\n",
+ "2 11.2 38 21.934911 15 Unknown E10 0 0 0\n",
+ "3 12.9 36 21.934911 14 Unknown E10 0 0 0\n",
+ "4 18.5 46 21.934911 15 Unknown E10 0 0 0"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X = df.drop([\"Unnamed: 0\",\"consume\"], axis=1) \n",
+ "X.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y=df.consume"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Convierto las columnas categóricas a numéricas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 388 entries, 0 to 387\n",
+ "Data columns (total 9 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null float64\n",
+ " 1 speed 388 non-null int64 \n",
+ " 2 temp_inside 388 non-null float64\n",
+ " 3 temp_outside 388 non-null int64 \n",
+ " 4 specials 388 non-null object \n",
+ " 5 gas_type 388 non-null object \n",
+ " 6 AC 388 non-null int64 \n",
+ " 7 rain 388 non-null int64 \n",
+ " 8 sun 388 non-null int64 \n",
+ "dtypes: float64(2), int64(5), object(2)\n",
+ "memory usage: 27.4+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "X.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Unknown', 'AC rain', 'AC', 'rain', 'snow', 'AC snow',\n",
+ " 'half rain half sun', 'sun', 'AC sun', 'sun ac', 'ac', 'AC Sun',\n",
+ " 'ac rain'], dtype=object)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X.specials.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dic_specials ={\"AC rain\":1,\n",
+ " \"ac rain\":1,\n",
+ " \"AC\":2,\n",
+ " \"ac\":2,\n",
+ " \"rain\":1,\n",
+ " \"snow\":3,\n",
+ " \"AC snow\":3,\n",
+ " \"half rain half sun\":4,\n",
+ " \"sun\": 5,\n",
+ " \"AC sun\": 5,\n",
+ " \"sun ac\": 5,\n",
+ " \"AC Sun\":5,\n",
+ " \"Unknown\":7} "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X.specials = X.specials.map(dic_specials)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['E10', 'SP98'], dtype=object)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X[\"gas_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dicc_gas={\"E10\":0,\"SP98\":1}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X[\"gas_type\"] = X[\"gas_type\"].map(dicc_gas)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = tts(X,y, test_size=0.2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "models={\n",
+ " 'ridge': Ridge(),\n",
+ " 'lasso': Lasso(),\n",
+ " 'sgd': SGDRegressor(),\n",
+ " 'knn': KNeighborsRegressor(),\n",
+ " 'grad': GradientBoostingRegressor(),\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ENTRENANDO: ridge\n",
+ "ENTRENANDO: lasso\n",
+ "ENTRENANDO: sgd\n",
+ "ENTRENANDO: knn\n",
+ "ENTRENANDO: grad\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " print(\"ENTRENANDO: \", name)\n",
+ " model.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "------ridge------\n",
+ "MAE - 0.5915556390285266\n",
+ "MSE - 0.6332090689017884\n",
+ "RMSE - 0.7957443489600089\n",
+ "R2 - -0.012519409694171424\n",
+ "------lasso------\n",
+ "MAE - 0.5863373677641668\n",
+ "MSE - 0.5938663515550878\n",
+ "RMSE - 0.7706272455312542\n",
+ "R2 - 0.05039072046668458\n",
+ "------sgd------\n",
+ "MAE - 2781777071543.0493\n",
+ "MSE - 2.41440185751784e+25\n",
+ "RMSE - 4913656334663.465\n",
+ "R2 - -3.860697617263027e+25\n",
+ "------knn------\n",
+ "MAE - 0.46461538461538454\n",
+ "MSE - 0.3931897435897436\n",
+ "RMSE - 0.6270484379932252\n",
+ "R2 - 0.37127835555520505\n",
+ "------grad------\n",
+ "MAE - 0.4349697461504338\n",
+ "MSE - 0.4202707359577462\n",
+ "RMSE - 0.6482829135167347\n",
+ "R2 - 0.32797507429623884\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " y_pred = model.predict(X_test)\n",
+ " print(f\"------{name}------\")\n",
+ " print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))\n",
+ " print('MSE - ', metrics.mean_squared_error(y_test, y_pred))\n",
+ " print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))\n",
+ " print('R2 - ', metrics.r2_score(y_test, y_pred))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Modelo: ridge Score: -0.24517033785848322\n",
+ "Modelo: lasso Score: -0.28426366190194124\n",
+ "Modelo: sgd Score: -1.3318385155302547e+25\n",
+ "Modelo: knn Score: 0.3493454625360017\n",
+ "Modelo: grad Score: 0.40623622869965026\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " scores=cvs(model, X, y, scoring='r2', cv=5)\n",
+ " print('Modelo: ', name, 'Score: ', np.mean(scores))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Me quedo con el modelo KNeighborsRegressor puesto que tiene el RMSE menor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/.ipynb_checkpoints/Visualizaciones-checkpoint.ipynb b/.ipynb_checkpoints/Visualizaciones-checkpoint.ipynb
new file mode 100644
index 0000000..6c763b6
--- /dev/null
+++ b/.ipynb_checkpoints/Visualizaciones-checkpoint.ipynb
@@ -0,0 +1,670 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Matplotlib inline para visualizar los gráficos de Matplotlib\n",
+ "%matplotlib inline\n",
+ "%config Inlinebackend.figure_format= 'retina'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Configuración para setear y que todas las fig de Seaborn salgan por defecto con este tamaño\n",
+ "# Se puede especificar el tamaño de cada figura\n",
+ "sns.set_context(\"poster\")\n",
+ "sns.set(rc={\"figure.figsize\": (12.,6.)})\n",
+ "sns.set_style(\"whitegrid\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurementsclean.csv\") #cargamos el csv."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.drop([\"Unnamed: 0\"], axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28.0 | \n",
+ " 5.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12.0 | \n",
+ " 4.2 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11.2 | \n",
+ " 5.5 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12.9 | \n",
+ " 3.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18.5 | \n",
+ " 4.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28.0 5.0 26 21.934911 12 Unknown E10 0 \n",
+ "1 12.0 4.2 30 21.934911 13 Unknown E10 0 \n",
+ "2 11.2 5.5 38 21.934911 15 Unknown E10 0 \n",
+ "3 12.9 3.9 36 21.934911 14 Unknown E10 0 \n",
+ "4 18.5 4.5 46 21.934911 15 Unknown E10 0 \n",
+ "\n",
+ " rain sun \n",
+ "0 0 0 \n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['E10', 'SP98'], dtype=object)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"gas_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Comparamos la velocidad media que alcanza cada combustible:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2 = df.groupby(\"gas_type\").agg({\"speed\": \"mean\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df3=df2.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "barplot = sns.barplot(x=\"gas_type\", y=\"speed\", data=df3,ci=\"sd\", palette=\"magma\");"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_dist = df.groupby(\"gas_type\").agg({\"distance\": \"mean\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ "
\n",
+ " \n",
+ " | gas_type | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | E10 | \n",
+ " 21.096250 | \n",
+ "
\n",
+ " \n",
+ " | SP98 | \n",
+ " 18.639912 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance\n",
+ "gas_type \n",
+ "E10 21.096250\n",
+ "SP98 18.639912"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_dist"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Relación entre la velocidad y el consumo para ambos combustibles:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.scatterplot(x=\"speed\", y=\"consume\", hue=\"gas_type\",data=df);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Estudio la correlación entre las diferentes variables:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "E10=df.loc[(df[\"gas_type\"]==\"E10\")]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corrE10 = E10.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | distance | \n",
+ " 1.000000 | \n",
+ " -0.172572 | \n",
+ " 0.633082 | \n",
+ " 0.137061 | \n",
+ " 0.158903 | \n",
+ " 0.045776 | \n",
+ " -0.073315 | \n",
+ " 0.027364 | \n",
+ "
\n",
+ " \n",
+ " | consume | \n",
+ " -0.172572 | \n",
+ " 1.000000 | \n",
+ " -0.233318 | \n",
+ " -0.040842 | \n",
+ " -0.322918 | \n",
+ " 0.043591 | \n",
+ " 0.248199 | \n",
+ " -0.160503 | \n",
+ "
\n",
+ " \n",
+ " | speed | \n",
+ " 0.633082 | \n",
+ " -0.233318 | \n",
+ " 1.000000 | \n",
+ " -0.014835 | \n",
+ " 0.091093 | \n",
+ " 0.125102 | \n",
+ " -0.001633 | \n",
+ " 0.128376 | \n",
+ "
\n",
+ " \n",
+ " | temp_inside | \n",
+ " 0.137061 | \n",
+ " -0.040842 | \n",
+ " -0.014835 | \n",
+ " 1.000000 | \n",
+ " 0.386506 | \n",
+ " 0.428083 | \n",
+ " 0.091396 | \n",
+ " 0.171009 | \n",
+ "
\n",
+ " \n",
+ " | temp_outside | \n",
+ " 0.158903 | \n",
+ " -0.322918 | \n",
+ " 0.091093 | \n",
+ " 0.386506 | \n",
+ " 1.000000 | \n",
+ " 0.048762 | \n",
+ " -0.097756 | \n",
+ " 0.236743 | \n",
+ "
\n",
+ " \n",
+ " | AC | \n",
+ " 0.045776 | \n",
+ " 0.043591 | \n",
+ " 0.125102 | \n",
+ " 0.428083 | \n",
+ " 0.048762 | \n",
+ " 1.000000 | \n",
+ " 0.336123 | \n",
+ " 0.171118 | \n",
+ "
\n",
+ " \n",
+ " | rain | \n",
+ " -0.073315 | \n",
+ " 0.248199 | \n",
+ " -0.001633 | \n",
+ " 0.091396 | \n",
+ " -0.097756 | \n",
+ " 0.336123 | \n",
+ " 1.000000 | \n",
+ " -0.094916 | \n",
+ "
\n",
+ " \n",
+ " | sun | \n",
+ " 0.027364 | \n",
+ " -0.160503 | \n",
+ " 0.128376 | \n",
+ " 0.171009 | \n",
+ " 0.236743 | \n",
+ " 0.171118 | \n",
+ " -0.094916 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside \\\n",
+ "distance 1.000000 -0.172572 0.633082 0.137061 0.158903 \n",
+ "consume -0.172572 1.000000 -0.233318 -0.040842 -0.322918 \n",
+ "speed 0.633082 -0.233318 1.000000 -0.014835 0.091093 \n",
+ "temp_inside 0.137061 -0.040842 -0.014835 1.000000 0.386506 \n",
+ "temp_outside 0.158903 -0.322918 0.091093 0.386506 1.000000 \n",
+ "AC 0.045776 0.043591 0.125102 0.428083 0.048762 \n",
+ "rain -0.073315 0.248199 -0.001633 0.091396 -0.097756 \n",
+ "sun 0.027364 -0.160503 0.128376 0.171009 0.236743 \n",
+ "\n",
+ " AC rain sun \n",
+ "distance 0.045776 -0.073315 0.027364 \n",
+ "consume 0.043591 0.248199 -0.160503 \n",
+ "speed 0.125102 -0.001633 0.128376 \n",
+ "temp_inside 0.428083 0.091396 0.171009 \n",
+ "temp_outside 0.048762 -0.097756 0.236743 \n",
+ "AC 1.000000 0.336123 0.171118 \n",
+ "rain 0.336123 1.000000 -0.094916 \n",
+ "sun 0.171118 -0.094916 1.000000 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "corrE10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mascara = np.triu(np.ones_like(corr, dtype=bool)) \n",
+ "color_map = sns.diverging_palette(0, 10, as_cmap=True) \n",
+ "sns.heatmap(corrE10, \n",
+ " mask = mascara,\n",
+ " cmap=color_map,\n",
+ " square=True, \n",
+ " linewidth=0.5,\n",
+ " vmax=1,\n",
+ " cbar_kws={\"shrink\": .5},\n",
+ " annot=True);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "SP98=df.loc[(df[\"gas_type\"]==\"SP98\")]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corrSP98 = SP98.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mascara = np.triu(np.ones_like(corr, dtype=bool)) \n",
+ "color_map = sns.diverging_palette(0, 10, as_cmap=True) \n",
+ "sns.heatmap(corrSP98, \n",
+ " mask = mascara,\n",
+ " cmap=\"viridis\",\n",
+ " square=True, \n",
+ " linewidth=0.5,\n",
+ " vmax=1,\n",
+ " cbar_kws={\"shrink\": .5},\n",
+ " annot=True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Mostramos la distribución de la variable consumo para cada tipo de carburantes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.violinplot(x=df.consume,y=df[\"gas_type\"]);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Limpieza de datos.ipynb b/Limpieza de datos.ipynb
new file mode 100644
index 0000000..06bb15e
--- /dev/null
+++ b/Limpieza de datos.ipynb
@@ -0,0 +1,749 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurements.csv\") #cargamos el csv."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ " refill liters | \n",
+ " refill gas | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28 | \n",
+ " 5 | \n",
+ " 26 | \n",
+ " 21,5 | \n",
+ " 12 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 45 | \n",
+ " E10 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12 | \n",
+ " 4,2 | \n",
+ " 30 | \n",
+ " 21,5 | \n",
+ " 13 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11,2 | \n",
+ " 5,5 | \n",
+ " 38 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12,9 | \n",
+ " 3,9 | \n",
+ " 36 | \n",
+ " 21,5 | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18,5 | \n",
+ " 4,5 | \n",
+ " 46 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28 5 26 21,5 12 NaN E10 0 \n",
+ "1 12 4,2 30 21,5 13 NaN E10 0 \n",
+ "2 11,2 5,5 38 21,5 15 NaN E10 0 \n",
+ "3 12,9 3,9 36 21,5 14 NaN E10 0 \n",
+ "4 18,5 4,5 46 21,5 15 NaN E10 0 \n",
+ "\n",
+ " rain sun refill liters refill gas \n",
+ "0 0 0 45 E10 \n",
+ "1 0 0 NaN NaN \n",
+ "2 0 0 NaN NaN \n",
+ "3 0 0 NaN NaN \n",
+ "4 0 0 NaN NaN "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Exploro la forma del dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(388, 12)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Veo la naturaleza de los datos y convierto a numéricos las dos primeras columnas (distancia y consumo):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 388 entries, 0 to 387\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null object\n",
+ " 1 consume 388 non-null object\n",
+ " 2 speed 388 non-null int64 \n",
+ " 3 temp_inside 376 non-null object\n",
+ " 4 temp_outside 388 non-null int64 \n",
+ " 5 specials 93 non-null object\n",
+ " 6 gas_type 388 non-null object\n",
+ " 7 AC 388 non-null int64 \n",
+ " 8 rain 388 non-null int64 \n",
+ " 9 sun 388 non-null int64 \n",
+ " 10 refill liters 13 non-null object\n",
+ " 11 refill gas 13 non-null object\n",
+ "dtypes: int64(5), object(7)\n",
+ "memory usage: 36.5+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def replace_coma(x):\n",
+ " return x.replace(\",\",\".\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lista_col=[\"distance\",\"consume\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for x in lista_col:\n",
+ " df[x]=list(df[x].apply(replace_coma))\n",
+ " df[x] = df[x].astype(float)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['temp_inside'] = pd.to_numeric(df['temp_inside'],errors = 'coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ " refill liters | \n",
+ " refill gas | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28.0 | \n",
+ " 5.0 | \n",
+ " 26 | \n",
+ " NaN | \n",
+ " 12 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 45 | \n",
+ " E10 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12.0 | \n",
+ " 4.2 | \n",
+ " 30 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11.2 | \n",
+ " 5.5 | \n",
+ " 38 | \n",
+ " NaN | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12.9 | \n",
+ " 3.9 | \n",
+ " 36 | \n",
+ " NaN | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18.5 | \n",
+ " 4.5 | \n",
+ " 46 | \n",
+ " NaN | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28.0 5.0 26 NaN 12 NaN E10 0 \n",
+ "1 12.0 4.2 30 NaN 13 NaN E10 0 \n",
+ "2 11.2 5.5 38 NaN 15 NaN E10 0 \n",
+ "3 12.9 3.9 36 NaN 14 NaN E10 0 \n",
+ "4 18.5 4.5 46 NaN 15 NaN E10 0 \n",
+ "\n",
+ " rain sun refill liters refill gas \n",
+ "0 0 0 45 E10 \n",
+ "1 0 0 NaN NaN \n",
+ "2 0 0 NaN NaN \n",
+ "3 0 0 NaN NaN \n",
+ "4 0 0 NaN NaN "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 388 entries, 0 to 387\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null float64\n",
+ " 1 consume 388 non-null float64\n",
+ " 2 speed 388 non-null int64 \n",
+ " 3 temp_inside 169 non-null float64\n",
+ " 4 temp_outside 388 non-null int64 \n",
+ " 5 specials 93 non-null object \n",
+ " 6 gas_type 388 non-null object \n",
+ " 7 AC 388 non-null int64 \n",
+ " 8 rain 388 non-null int64 \n",
+ " 9 sun 388 non-null int64 \n",
+ " 10 refill liters 13 non-null object \n",
+ " 11 refill gas 13 non-null object \n",
+ "dtypes: float64(3), int64(5), object(4)\n",
+ "memory usage: 36.5+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Compruebo que no hay duplicados:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=df.drop_duplicates() #no hay duplicados."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(388, 12)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Compruebo dónde hay valores nulos:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "distance 0\n",
+ "consume 0\n",
+ "speed 0\n",
+ "temp_inside 219\n",
+ "temp_outside 0\n",
+ "specials 295\n",
+ "gas_type 0\n",
+ "AC 0\n",
+ "rain 0\n",
+ "sun 0\n",
+ "refill liters 375\n",
+ "refill gas 375\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Relleno los nulos con \"Unkown\" al tratarse de variables categóricas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 388 entries, 0 to 387\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null float64\n",
+ " 1 consume 388 non-null float64\n",
+ " 2 speed 388 non-null int64 \n",
+ " 3 temp_inside 169 non-null float64\n",
+ " 4 temp_outside 388 non-null int64 \n",
+ " 5 specials 93 non-null object \n",
+ " 6 gas_type 388 non-null object \n",
+ " 7 AC 388 non-null int64 \n",
+ " 8 rain 388 non-null int64 \n",
+ " 9 sun 388 non-null int64 \n",
+ " 10 refill liters 13 non-null object \n",
+ " 11 refill gas 13 non-null object \n",
+ "dtypes: float64(3), int64(5), object(4)\n",
+ "memory usage: 39.4+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#relleno los Nan con la media de las temperaturas al ser una variable numérica:\n",
+ "df[\"temp_inside\"].fillna(df[\"temp_inside\"].mean(), inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#relleno los Nan de \"specials\" con \"Unkown\" al ser categórica:\n",
+ "df.specials.fillna((\"Unknown\"), inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#elimino las columnas de \"refill liters\" y \"refill gas\" por el alto porcentaje de Nans que contienen:\n",
+ "df.drop([\"refill liters\",\"refill gas\"], axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "distance 0\n",
+ "consume 0\n",
+ "speed 0\n",
+ "temp_inside 0\n",
+ "temp_outside 0\n",
+ "specials 0\n",
+ "gas_type 0\n",
+ "AC 0\n",
+ "rain 0\n",
+ "sun 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 57 | \n",
+ " 12.3 | \n",
+ " 6.2 | \n",
+ " 61 | \n",
+ " 21.934911 | \n",
+ " 8 | \n",
+ " Unknown | \n",
+ " SP98 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "57 12.3 6.2 61 21.934911 8 Unknown SP98 0 \n",
+ "\n",
+ " rain sun \n",
+ "57 0 0 "
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.sample()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Guardo el dataset limpio:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv('measurementsclean.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Machine Learning- Predictions.ipynb b/Machine Learning- Predictions.ipynb
new file mode 100644
index 0000000..8d427b5
--- /dev/null
+++ b/Machine Learning- Predictions.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn.model_selection import train_test_split as tts\n",
+ "from sklearn import metrics\n",
+ "import numpy as np\n",
+ "from sklearn.linear_model import LinearRegression as LinReg\n",
+ "from sklearn.linear_model import Ridge, Lasso\n",
+ "from sklearn.linear_model import SGDRegressor\n",
+ "from sklearn.neighbors import KNeighborsRegressor\n",
+ "from sklearn.ensemble import GradientBoostingRegressor\n",
+ "from sklearn.svm import SVR\n",
+ "from sklearn.model_selection import cross_val_score as cvs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurementsclean.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 28.0 | \n",
+ " 5.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 12.0 | \n",
+ " 4.2 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " 11.2 | \n",
+ " 5.5 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " 12.9 | \n",
+ " 3.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " 18.5 | \n",
+ " 4.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 distance consume speed temp_inside temp_outside specials \\\n",
+ "0 0 28.0 5.0 26 21.934911 12 Unknown \n",
+ "1 1 12.0 4.2 30 21.934911 13 Unknown \n",
+ "2 2 11.2 5.5 38 21.934911 15 Unknown \n",
+ "3 3 12.9 3.9 36 21.934911 14 Unknown \n",
+ "4 4 18.5 4.5 46 21.934911 15 Unknown \n",
+ "\n",
+ " gas_type AC rain sun \n",
+ "0 E10 0 0 0 \n",
+ "1 E10 0 0 0 \n",
+ "2 E10 0 0 0 \n",
+ "3 E10 0 0 0 \n",
+ "4 E10 0 0 0 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12.0 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11.2 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance speed temp_inside temp_outside specials gas_type AC rain sun\n",
+ "0 28.0 26 21.934911 12 Unknown E10 0 0 0\n",
+ "1 12.0 30 21.934911 13 Unknown E10 0 0 0\n",
+ "2 11.2 38 21.934911 15 Unknown E10 0 0 0\n",
+ "3 12.9 36 21.934911 14 Unknown E10 0 0 0\n",
+ "4 18.5 46 21.934911 15 Unknown E10 0 0 0"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X = df.drop([\"Unnamed: 0\",\"consume\"], axis=1) \n",
+ "X.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y=df.consume"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Convierto las columnas categóricas a numéricas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 388 entries, 0 to 387\n",
+ "Data columns (total 9 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 distance 388 non-null float64\n",
+ " 1 speed 388 non-null int64 \n",
+ " 2 temp_inside 388 non-null float64\n",
+ " 3 temp_outside 388 non-null int64 \n",
+ " 4 specials 388 non-null object \n",
+ " 5 gas_type 388 non-null object \n",
+ " 6 AC 388 non-null int64 \n",
+ " 7 rain 388 non-null int64 \n",
+ " 8 sun 388 non-null int64 \n",
+ "dtypes: float64(2), int64(5), object(2)\n",
+ "memory usage: 27.4+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "X.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Unknown', 'AC rain', 'AC', 'rain', 'snow', 'AC snow',\n",
+ " 'half rain half sun', 'sun', 'AC sun', 'sun ac', 'ac', 'AC Sun',\n",
+ " 'ac rain'], dtype=object)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X.specials.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dic_specials ={\"AC rain\":1,\n",
+ " \"ac rain\":1,\n",
+ " \"AC\":2,\n",
+ " \"ac\":2,\n",
+ " \"rain\":1,\n",
+ " \"snow\":3,\n",
+ " \"AC snow\":3,\n",
+ " \"half rain half sun\":4,\n",
+ " \"sun\": 5,\n",
+ " \"AC sun\": 5,\n",
+ " \"sun ac\": 5,\n",
+ " \"AC Sun\":5,\n",
+ " \"Unknown\":7} "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X.specials = X.specials.map(dic_specials)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['E10', 'SP98'], dtype=object)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X[\"gas_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dicc_gas={\"E10\":0,\"SP98\":1}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X[\"gas_type\"] = X[\"gas_type\"].map(dicc_gas)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = tts(X,y, test_size=0.2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "models={\n",
+ " 'ridge': Ridge(),\n",
+ " 'lasso': Lasso(),\n",
+ " 'sgd': SGDRegressor(),\n",
+ " 'knn': KNeighborsRegressor(),\n",
+ " 'grad': GradientBoostingRegressor(),\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ENTRENANDO: ridge\n",
+ "ENTRENANDO: lasso\n",
+ "ENTRENANDO: sgd\n",
+ "ENTRENANDO: knn\n",
+ "ENTRENANDO: grad\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " print(\"ENTRENANDO: \", name)\n",
+ " model.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "------ridge------\n",
+ "MAE - 0.5915556390285266\n",
+ "MSE - 0.6332090689017884\n",
+ "RMSE - 0.7957443489600089\n",
+ "R2 - -0.012519409694171424\n",
+ "------lasso------\n",
+ "MAE - 0.5863373677641668\n",
+ "MSE - 0.5938663515550878\n",
+ "RMSE - 0.7706272455312542\n",
+ "R2 - 0.05039072046668458\n",
+ "------sgd------\n",
+ "MAE - 2781777071543.0493\n",
+ "MSE - 2.41440185751784e+25\n",
+ "RMSE - 4913656334663.465\n",
+ "R2 - -3.860697617263027e+25\n",
+ "------knn------\n",
+ "MAE - 0.46461538461538454\n",
+ "MSE - 0.3931897435897436\n",
+ "RMSE - 0.6270484379932252\n",
+ "R2 - 0.37127835555520505\n",
+ "------grad------\n",
+ "MAE - 0.4349697461504338\n",
+ "MSE - 0.4202707359577462\n",
+ "RMSE - 0.6482829135167347\n",
+ "R2 - 0.32797507429623884\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " y_pred = model.predict(X_test)\n",
+ " print(f\"------{name}------\")\n",
+ " print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))\n",
+ " print('MSE - ', metrics.mean_squared_error(y_test, y_pred))\n",
+ " print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))\n",
+ " print('R2 - ', metrics.r2_score(y_test, y_pred))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Modelo: ridge Score: -0.24517033785848322\n",
+ "Modelo: lasso Score: -0.28426366190194124\n",
+ "Modelo: sgd Score: -1.3318385155302547e+25\n",
+ "Modelo: knn Score: 0.3493454625360017\n",
+ "Modelo: grad Score: 0.40623622869965026\n"
+ ]
+ }
+ ],
+ "source": [
+ "for name, model in models.items():\n",
+ " scores=cvs(model, X, y, scoring='r2', cv=5)\n",
+ " print('Modelo: ', name, 'Score: ', np.mean(scores))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Me quedo con el modelo KNeighborsRegressor puesto que tiene el RMSE menor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Visualizaciones.ipynb b/Visualizaciones.ipynb
new file mode 100644
index 0000000..6c763b6
--- /dev/null
+++ b/Visualizaciones.ipynb
@@ -0,0 +1,670 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Matplotlib inline para visualizar los gráficos de Matplotlib\n",
+ "%matplotlib inline\n",
+ "%config Inlinebackend.figure_format= 'retina'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Configuración para setear y que todas las fig de Seaborn salgan por defecto con este tamaño\n",
+ "# Se puede especificar el tamaño de cada figura\n",
+ "sns.set_context(\"poster\")\n",
+ "sns.set(rc={\"figure.figsize\": (12.,6.)})\n",
+ "sns.set_style(\"whitegrid\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"measurementsclean.csv\") #cargamos el csv."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.drop([\"Unnamed: 0\"], axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 28.0 | \n",
+ " 5.0 | \n",
+ " 26 | \n",
+ " 21.934911 | \n",
+ " 12 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 12.0 | \n",
+ " 4.2 | \n",
+ " 30 | \n",
+ " 21.934911 | \n",
+ " 13 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11.2 | \n",
+ " 5.5 | \n",
+ " 38 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 12.9 | \n",
+ " 3.9 | \n",
+ " 36 | \n",
+ " 21.934911 | \n",
+ " 14 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18.5 | \n",
+ " 4.5 | \n",
+ " 46 | \n",
+ " 21.934911 | \n",
+ " 15 | \n",
+ " Unknown | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28.0 5.0 26 21.934911 12 Unknown E10 0 \n",
+ "1 12.0 4.2 30 21.934911 13 Unknown E10 0 \n",
+ "2 11.2 5.5 38 21.934911 15 Unknown E10 0 \n",
+ "3 12.9 3.9 36 21.934911 14 Unknown E10 0 \n",
+ "4 18.5 4.5 46 21.934911 15 Unknown E10 0 \n",
+ "\n",
+ " rain sun \n",
+ "0 0 0 \n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['E10', 'SP98'], dtype=object)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"gas_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Comparamos la velocidad media que alcanza cada combustible:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2 = df.groupby(\"gas_type\").agg({\"speed\": \"mean\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df3=df2.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAAF5CAYAAAC/R71dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAc/UlEQVR4nO3da5SV5X3/4e/MMDMKSCYQGUaxNR4wREpFsWKqqy5o1FYiJG0KwajL1pqYph5yMCzUAY05QEhtJFpNajy0qa7YahHUQBBXo6kuD9UIEoO1mDY6QIEQI+owzMz/RZbzD/U0eDN7z+h1vWI/ez/7+e158azPurlnT013d3d3AACAt6S22gMAAMBAJqgBAKCAoAYAgAKCGgAACghqAAAoMKjaA5To6urKtm3bUl9fn5qammqPAwDA21B3d3c6OjoyZMiQ1Na+ej16QAf1tm3bsnbt2mqPAQDAO8CYMWOy1157ver4gA7q+vr6JL/+cA0NDVWeBgCAt6Pt27dn7dq1Pe35fw3ooH5lm0dDQ0MaGxurPA0AAG9nr7fF2C8lAgBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUG9G3S0d1R7BGCAcL8AePsZVO0B3g7qG+vzqd+5oNpjAAPAVasWVHsEAHYzK9QAAFBAUAMAQAFBDQAABQQ1AAAUENQAAFBAUAMAQAFBDQAABQQ1AAAUENQAAFBAUAMAQAFBDQAABQQ1AAAUENQAAFBAUAMAQAFBDQAABQQ1AAAUENQAAFBAUANQFV0dHdUeARgg+vv9YlC1BwDgnam2vj7rPnt+tccABoD3fv3yao/whiq+Qv3Nb34zhxxySNauXZskeeyxx3LyySfnhBNOyJ//+Z9n8+bNlR4JAADesooG9RNPPJHHHnss++67b5Kkq6srn//859Pa2pply5Zl4sSJWbhwYSVHAgCAIhUL6u3bt+fSSy/NvHnzeo6tXr06jY2NmThxYpJk5syZ+f73v1+pkQAAoFjFgvob3/hGTj755IwePbrnWFtbW/bZZ5+ex8OHD09XV1e2bt1aqbEAAKBIRX4p8dFHH83q1avzuc99rk/ef/Xq1X3yvr11xBFHVPX6wMDyyCOPVHuEfsG9E9gV/fneWZGgfuihh/L0009nypQpSZL169fnL/7iL3Lqqafmueee63ndli1bUltbm6ampl16/3HjxqWxsXF3jgzQZ4QkwK6r5r2zvb39DRdwK7Ll46yzzsp9992XlStXZuXKlRk1alSuvfbanHnmmXn55Zfz8MMPJ0luvvnmnHjiiZUYCQAAdouqfg91bW1tFixYkLlz56a9vT377rtvvva1r1VzJAAA2CVVCeqVK1f2/Pvwww/PkiVLqjEGAAAU86fHAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACghoAAAoIagAAKCCoAQCggKAGAIACgyp1oU996lP5+c9/ntra2gwePDgXX3xxxo4dm3Xr1mX27NnZunVrmpqaMn/+/Oy///6VGgsAAIpULKjnz5+fvfbaK0myYsWKzJkzJ7fddlvmzp2bWbNmZdq0aVm8eHFaW1tz4403VmosAAAoUrEtH6/EdJK88MILqampyebNm7NmzZpMnTo1STJ16tSsWbMmW7ZsqdRYAABQpGIr1Ely4YUX5kc/+lG6u7vz93//92lra0tzc3Pq6uqSJHV1dRk5cmTa2toyfPjwSo4GAABvSUWD+ktf+lKS5F//9V+zYMGCnHvuubvlfVevXr1b3uetOuKII6p6fWBgeeSRR6o9Qr/g3gnsiv5876xoUL9i+vTpaW1tzahRo7Jhw4Z0dnamrq4unZ2d2bhxY1paWnbp/caNG5fGxsY+mhZg9xKSALuumvfO9vb2N1zArcge6m3btqWtra3n8cqVK/Oud70rI0aMyNixY7N06dIkydKlSzN27FjbPQAAGDAqskL90ksv5dxzz81LL72U2travOtd78rVV1+dmpqazJs3L7Nnz85VV12VYcOGZf78+ZUYCQAAdouKBPV73vOefO9733vN5w488MDccsstlRgDAAB2O38pEQAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoIKgBAKCAoAYAgAKCGgAACghqAAAoMOiNnrz//vt79SZHH330bhkGAAAGmjcM6gsvvHCnxxs3bkySNDU1ZevWrUmS5ubm3H333X0zHQAA9HNvGNQrV67s+ffVV1+drVu35txzz82ee+6Zl156KVdccUWampr6ekYAAOi3er2H+vrrr89nP/vZ7LnnnkmSPffcM5/5zGdy3XXX9dlwAADQ3/U6qAcPHpzHH398p2OrVq3qCWwAAHgnesMtH7/pnHPOyZlnnpnJkydn1KhRWb9+fe655560trb25XwAANCv9Tqop0+fnnHjxmXZsmXZuHFj3vve9+bss8/OQQcd1JfzAQBAv9broE6Sgw46KAcccEA2bdqUkSNH9tVMAAAwYPR6D/Xzzz+fz372sxk/fnyOP/74JMndd9+dyy+/vM+GAwCA/q7XQT137twMHTo0K1euTH19fZJkwoQJueuuu/psOAAA6O96veXj/vvvz7333pv6+vrU1NQkSYYPH57Nmzf32XAAANDf9XqFeq+99sovfvGLnY4999xz2XvvvXf7UAAAMFD0Oqg/+tGP5pxzzskDDzyQrq6uPProo/nCF76QmTNn9uV8AADQr/V6y8df/uVfprGxMZdeeml27NiROXPmZMaMGTn99NP7cj4AAOjXeh3UNTU1Of300wU0AAD8hl36Huof/ehHueOOO7Jly5ZcffXVWbVqVV544YUcffTRfTUfAAD0a73eQ/0P//APmTdvXvbff/889NBDSZI99tgj3/jGN/psOAAA6O96HdQ33HBDrrvuupx11lmprf31aQcccEDWrVvXZ8MBAEB/1+ug3rZtW1paWpKk53uod+zY0fNHXgAA4J2o10F95JFH5lvf+tZOx2688cYcddRRu30oAAAYKHr9S4kXXXRRPvnJT+aWW27Jtm3bcsIJJ2TIkCG55ppr+nI+AADo13od1CNHjsy//Mu/ZNWqVXn22WfT0tKS8ePH9+ynBgCAd6JdquGurq50dHQkSTo7O9Pd3d0nQwEAwEDR6xXqJ598Mn/1V3+V7du3p7m5OevXr09jY2OuvPLKvO997+vLGQEAoN/qdVDPmTMnp5xySs4444zU1NSku7s7119/febMmZNbb721L2cEAIB+q9dbPp555pmcfvrpPV+ZV1NTk9NOOy3PPPNMX80GAAD9Xq+D+g/+4A+ycuXKnY7dc889Oe6443b3TAAAMGD0estHZ2dnzj///IwbNy6jRo3K+vXrs3r16kyZMiUXXHBBz+sWLFjQJ4MCAEB/1OugHjNmTMaMGdPz+KCDDsoxxxzTJ0MBAMBA0eugnjhxYvbdd9/st99+2bhxYxYuXJi6urp85jOfyd57792XMwIAQL/V6z3Ul1xySerq6pIk8+fPT2dnZ2pqanLxxRf32XAAANDf9XqFesOGDdlnn32yY8eO3HvvvbnnnntSX1+fY489ti/nAwCAfq3XQT106NBs2rQpTz31VA466KAMGTIk27dvz44dO/pyPgAA6Nd6HdQf//jH86d/+qfp6OjInDlzkiT/8R//kQMOOOBNz/3FL36RCy64IP/93/+dhoaG/PZv/3YuvfTSDB8+PI899lhaW1vT3t6efffdN1/72tcyYsSIt/6JAACggnq9h/qss87Kddddl5tuuiknnXRSkqS5uTmXXXbZm55bU1OTM888M8uWLcuSJUuy3377ZeHChenq6srnP//5tLa2ZtmyZZk4cWIWLlz41j8NAABUWK+DOkne+9735rd+67d2enzIIYe86XlNTU056qijeh4fdthhee6557J69eo0NjZm4sSJSZKZM2fm+9///q6MBAAAVbVLQb07dHV15aabbsrkyZPT1taWffbZp+e54cOHp6urK1u3bq30WAAA8Jb0eg/17vLFL34xgwcPzsc//vH84Ac/2C3vuXr16t3yPm/VEUccUdXrAwPLI488Uu0R+gX3TmBX9Od7Z0WDev78+fnZz36Wq6++OrW1tWlpaclzzz3X8/yWLVtSW1ubpqamXXrfcePGpbGxcTdPC9A3hCTArqvmvbO9vf0NF3ArtuXjb/7mb7J69epceeWVaWhoSPLrEH755Zfz8MMPJ0luvvnmnHjiiZUaCQAAilVkhfqpp57KNddck/333z8zZ85MkowePTpXXnllFixYkLlz5+70tXkAADBQVCSoDz744Pz0pz99zecOP/zwLFmypBJjAADAblfxb/kAAIC3E0ENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFBDUAABQQ1AAAUEBQAwBAAUENAAAFKhLU8+fPz+TJk3PIIYdk7dq1PcfXrVuXGTNm5IQTTsiMGTPyzDPPVGIcAADYbSoS1FOmTMl3v/vd7Lvvvjsdnzt3bmbNmpVly5Zl1qxZaW1trcQ4AACw21QkqCdOnJiWlpadjm3evDlr1qzJ1KlTkyRTp07NmjVrsmXLlkqMBAAAu0XV9lC3tbWlubk5dXV1SZK6urqMHDkybW1t1RoJAAB22aBqD7A7rF69uqrXP+KII6p6fWBgeeSRR6o9Qr/g3gnsiv5876xaULe0tGTDhg3p7OxMXV1dOjs7s3HjxldtDemNcePGpbGxsQ+mBNj9hCTArqvmvbO9vf0NF3CrtuVjxIgRGTt2bJYuXZokWbp0acaOHZvhw4dXayQAANhlFVmhvuyyy7J8+fJs2rQpZ5xxRpqamnLHHXdk3rx5mT17dq666qoMGzYs8+fPr8Q4AACw21QkqC+66KJcdNFFrzp+4IEH5pZbbqnECAAA0Cf8pUQAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIaAAAKCGoAACggqAEAoEC/COp169ZlxowZOeGEEzJjxow888wz1R4JAAB6pV8E9dy5czNr1qwsW7Yss2bNSmtra7VHAgCAXql6UG/evDlr1qzJ1KlTkyRTp07NmjVrsmXLlipPBgAAb25QtQdoa2tLc3Nz6urqkiR1dXUZOXJk2traMnz48Dc8t7u7O0myffv2Pp/zzQx9z+BqjwAMAO3t7dUeoV/pHDKk2iMAA0C1752vtOYr7fl/VT2oS3R0dCRJ1q5dW+VJkhkLP1TtEYABYPXq1dUeoX+Z9uFqTwAMAL/sJ/fOjo6O7LHHHq86XvWgbmlpyYYNG9LZ2Zm6urp0dnZm48aNaWlpedNzhwwZkjFjxqS+vj41NTUVmBYAgHea7u7udHR0ZMjr/K9a1YN6xIgRGTt2bJYuXZpp06Zl6dKlGTt27Jtu90iS2tra7LXXXhWYEgCAd7LXWpl+RU33620GqaCnn346s2fPzvPPP59hw4Zl/vz5OeCAA6o9FgAAvKl+EdQAADBQVf1r8wAAYCAT1AAAUEBQAwBAAUENAAAFBDUAABSo+vdQw0A0efLkNDQ0pLGxsefYlVdeme9+97tZtmxZnn322SxZsiRjxozpeX7dunWZPXt2tm7dmqampsyfPz/7779/FaYHqJy77ror11xzTbq7u9Pe3p5DDz00X//613vuow0NDenq6srZZ5+dk046KZ2dnfnqV7+a+++/Px0dHZk8eXIuuOCC1NTUpKurK1/+8pfz7//+76mrq8vIkSPz5S9/Oc3NzdX+mLzDCWp4i6644oqdgjlJpkyZktNOOy2nnHLKq14/d+7czJo1K9OmTcvixYvT2tqaG2+8sVLjAlTcxo0bc8kll+S2225LS0tLuru785Of/KTn+Vfuo2vWrMnMmTNz9NFH5wc/+EH+67/+K7fddltqamryyU9+MnfeeWdOOumkrFy5Mo8//nhuv/32DBo0KF/5ylfyd3/3d5k3b171PiTElg/YrSZOnJiWlpZXHd+8eXPWrFmTqVOnJkmmTp2aNWvWZMuWLZUeEaBiNm3alEGDBqWpqSlJUlNTk/e///2vet373//+DBkyJD//+c/z5JNP5uijj059fX0GDRqU3//938+SJUt6Xrt9+/a0t7enq6sr27Zty6hRoyr1ceB1WaGGt+icc87p2fJRV1eXW2+99XVf29bWlubm5tTV1fW8fuTIkWlra8vw4cMrMi9Apb3vfe/L+PHjc9xxx+Woo47K4YcfnmnTpuXd7373Tq974IEH0t7env333z+HHnpobr311syaNStJsmLFijz//PNJfr3d7sEHH8wxxxyTPfbYIwcccEBaW1sr/rng/xLU8Ba91pYPAP6/2traXHXVVVm7dm0eeuihrFixItdee23PivMrCxNDhw7NokWLMmzYsHzkIx/J//zP/+RjH/tYhg4dmvHjx+eBBx5IkjzxxBN5+umn88Mf/jBDhgzJl770pXz1q18V1VSdoIYKaGlpyYYNG9LZ2Zm6urp0dnZm48aNr7k9BODtZsyYMRkzZkxOOeWU/PEf/3EefPDBJK+9MFFbW5vzzz8/559/fpLk29/+dg488MAkyW233ZZJkyZlr732SpKcfPLJmTNnTgU/Cbw2e6ihAkaMGJGxY8dm6dKlSZKlS5dm7NixtnsAb2sbNmzIo48+2vN4/fr12bJlS0aPHv2657S3t+dXv/pVkuS5557LTTfdlDPOOCNJMnr06DzwwAPp6OhIkvzbv/1bDj744D78BNA7VqjhLfrNPdRJctlll2Xx4sVZvnx5Nm3alDPOOCNNTU254447kiTz5s3L7Nmzc9VVV2XYsGGZP39+tUYHqIgdO3Zk0aJFefbZZ7PHHnukq6sr55133mv+YuIrfvWrX+XUU09Nbe2v1/w+97nP5dBDD02SnHLKKXnqqady8sknZ9CgQWlpackXv/jFinwWeCM13d3d3dUeAgAABipbPgAAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAoIa4B3o1ltvzcc+9rFqjwHwtiCoAQCggKAG6EeeeOKJTJ8+PRMmTMg555yT8847L5dffnl++ctf5hOf+EQmTZqUI488Mp/4xCeyfv36nvNuvfXWTJkyJRMmTMjkyZNz++23v+41nn766cydOzePPfZYJkyYkIkTJ+bxxx/PBz7wgXR2dva8bvny5Tn55JOTJIsWLeqZZ8KECfnwhz+cJ598sue1GzZsyF//9V9n0qRJmTx5cm688cY++OkA9E+CGqCf2L59ez796U/nwx/+cB588MFMnTo1K1asSJJ0dXXlIx/5SO65557cc889aWxszKWXXpokefHFF3PZZZfl29/+dh599NHcfPPNGTt27Ote58ADD8wll1ySww47LI8++mgefvjhjB8/Pk1NTbnvvvt6Xrd48eJMnz695/Hdd9+dE088sWe2T33qU+no6EhXV1fOPvvsHHLIIfnhD3+YG264ITfccEPuvffevvlBAfQzghqgn/jxj3+cHTt25LTTTkt9fX2OP/74/M7v/E6S5N3vfndOOOGE7Lnnnhk6dGjOPvvsPPTQQz3n1tbW5qmnnsrLL7+ckSNH5uCDD97l60+fPr1nZXvr1q257777MnXq1J7nDz300Jx44ompr6/PGWecke3bt+fHP/5xVq1alS1btuTTn/50Ghoast9+++XP/uzPcueddxb+RAAGhkHVHgCAX9u4cWOam5tTU1PTc6ylpSVJ8tJLL+UrX/lK7r333vzyl79Mkmzbti2dnZ0ZPHhwLr/88nznO9/JhRdemMMPPzxf+MIXcuCBB+7S9adNm5Y/+qM/yosvvpi77rorEydOzMiRI3ueHzVqVM+/a2tr09zcnI0bN/bMPnHixJ7nOzs7d3oM8HYmqAH6ib333jsbNmxId3d3T1S3tbVlv/32y3e+852sW7cu3/ve97L33nvnJz/5SaZPn57u7u4kybHHHptjjz02L7/8cv72b/82F198cf7pn/7pda/1m9H+iubm5kyYMCHLly/P4sWLX/UtIL+5Z7urqysbNmzIyJEjU1dXl9GjR2f58uW748cAMODY8gHQTxx22GGpq6vLP/7jP2bHjh1ZsWJFVq1aleTXq9GNjY0ZNmxYtm7dmm9+85s9523atCkrVqzIiy++mIaGhgwePDi1tW98ex8xYkQ2bNiQ7du373R82rRpufbaa7N27docf/zxOz33xBNPZPny5dmxY0duuOGGNDQ05Hd/93czfvz4DBkyJN/61rfy8ssvp7OzM2vXrs3jjz++m34yAP2boAboJxoaGrJo0aL88z//c4488sjcfvvtOe6449LQ0JDTTz897e3tmTRpUmbMmJFjjz2257yurq5cf/31OfbYY/N7v/d7eeihhzJv3rw3vNakSZNy0EEH5ZhjjslRRx3Vc/yDH/xgnn322Xzwgx/MnnvuudM5U6ZMyZ133pkjjzwyixcvzqJFi1JfX5+6urpcffXVefLJJzNlypRMmjQpF110UV544YXd+vMB6K9qul/5/0IA+p2PfvSjmTlzZv7kT/6kYtf8wz/8w1x66aX5wAc+0HNs0aJF+dnPfpaFCxdWbA6AgcIKNUA/8uCDD+Z///d/s2PHjtx222356U9/utNqdF9btmxZampqMmnSpIpdE2Cg80uJAP3IunXrct555+Wll17K6NGjc8UVV+z0TRu7orW1NUuWLHnV8Q996EM932H9m0499dT853/+ZxYsWPCme7AB+P9s+QAAgAKWIAAAoICgBgCAAoIaAAAKCGoAACggqAEAoICgBgCAAv8PxhaweKG1nrEAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "barplot = sns.barplot(x=\"gas_type\", y=\"speed\", data=df3,ci=\"sd\", palette=\"magma\");"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_dist = df.groupby(\"gas_type\").agg({\"distance\": \"mean\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ "
\n",
+ " \n",
+ " | gas_type | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | E10 | \n",
+ " 21.096250 | \n",
+ "
\n",
+ " \n",
+ " | SP98 | \n",
+ " 18.639912 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance\n",
+ "gas_type \n",
+ "E10 21.096250\n",
+ "SP98 18.639912"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_dist"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Relación entre la velocidad y el consumo para ambos combustibles:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.scatterplot(x=\"speed\", y=\"consume\", hue=\"gas_type\",data=df);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Estudio la correlación entre las diferentes variables:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "E10=df.loc[(df[\"gas_type\"]==\"E10\")]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corrE10 = E10.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | distance | \n",
+ " 1.000000 | \n",
+ " -0.172572 | \n",
+ " 0.633082 | \n",
+ " 0.137061 | \n",
+ " 0.158903 | \n",
+ " 0.045776 | \n",
+ " -0.073315 | \n",
+ " 0.027364 | \n",
+ "
\n",
+ " \n",
+ " | consume | \n",
+ " -0.172572 | \n",
+ " 1.000000 | \n",
+ " -0.233318 | \n",
+ " -0.040842 | \n",
+ " -0.322918 | \n",
+ " 0.043591 | \n",
+ " 0.248199 | \n",
+ " -0.160503 | \n",
+ "
\n",
+ " \n",
+ " | speed | \n",
+ " 0.633082 | \n",
+ " -0.233318 | \n",
+ " 1.000000 | \n",
+ " -0.014835 | \n",
+ " 0.091093 | \n",
+ " 0.125102 | \n",
+ " -0.001633 | \n",
+ " 0.128376 | \n",
+ "
\n",
+ " \n",
+ " | temp_inside | \n",
+ " 0.137061 | \n",
+ " -0.040842 | \n",
+ " -0.014835 | \n",
+ " 1.000000 | \n",
+ " 0.386506 | \n",
+ " 0.428083 | \n",
+ " 0.091396 | \n",
+ " 0.171009 | \n",
+ "
\n",
+ " \n",
+ " | temp_outside | \n",
+ " 0.158903 | \n",
+ " -0.322918 | \n",
+ " 0.091093 | \n",
+ " 0.386506 | \n",
+ " 1.000000 | \n",
+ " 0.048762 | \n",
+ " -0.097756 | \n",
+ " 0.236743 | \n",
+ "
\n",
+ " \n",
+ " | AC | \n",
+ " 0.045776 | \n",
+ " 0.043591 | \n",
+ " 0.125102 | \n",
+ " 0.428083 | \n",
+ " 0.048762 | \n",
+ " 1.000000 | \n",
+ " 0.336123 | \n",
+ " 0.171118 | \n",
+ "
\n",
+ " \n",
+ " | rain | \n",
+ " -0.073315 | \n",
+ " 0.248199 | \n",
+ " -0.001633 | \n",
+ " 0.091396 | \n",
+ " -0.097756 | \n",
+ " 0.336123 | \n",
+ " 1.000000 | \n",
+ " -0.094916 | \n",
+ "
\n",
+ " \n",
+ " | sun | \n",
+ " 0.027364 | \n",
+ " -0.160503 | \n",
+ " 0.128376 | \n",
+ " 0.171009 | \n",
+ " 0.236743 | \n",
+ " 0.171118 | \n",
+ " -0.094916 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside \\\n",
+ "distance 1.000000 -0.172572 0.633082 0.137061 0.158903 \n",
+ "consume -0.172572 1.000000 -0.233318 -0.040842 -0.322918 \n",
+ "speed 0.633082 -0.233318 1.000000 -0.014835 0.091093 \n",
+ "temp_inside 0.137061 -0.040842 -0.014835 1.000000 0.386506 \n",
+ "temp_outside 0.158903 -0.322918 0.091093 0.386506 1.000000 \n",
+ "AC 0.045776 0.043591 0.125102 0.428083 0.048762 \n",
+ "rain -0.073315 0.248199 -0.001633 0.091396 -0.097756 \n",
+ "sun 0.027364 -0.160503 0.128376 0.171009 0.236743 \n",
+ "\n",
+ " AC rain sun \n",
+ "distance 0.045776 -0.073315 0.027364 \n",
+ "consume 0.043591 0.248199 -0.160503 \n",
+ "speed 0.125102 -0.001633 0.128376 \n",
+ "temp_inside 0.428083 0.091396 0.171009 \n",
+ "temp_outside 0.048762 -0.097756 0.236743 \n",
+ "AC 1.000000 0.336123 0.171118 \n",
+ "rain 0.336123 1.000000 -0.094916 \n",
+ "sun 0.171118 -0.094916 1.000000 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "corrE10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mascara = np.triu(np.ones_like(corr, dtype=bool)) \n",
+ "color_map = sns.diverging_palette(0, 10, as_cmap=True) \n",
+ "sns.heatmap(corrE10, \n",
+ " mask = mascara,\n",
+ " cmap=color_map,\n",
+ " square=True, \n",
+ " linewidth=0.5,\n",
+ " vmax=1,\n",
+ " cbar_kws={\"shrink\": .5},\n",
+ " annot=True);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "SP98=df.loc[(df[\"gas_type\"]==\"SP98\")]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corrSP98 = SP98.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mascara = np.triu(np.ones_like(corr, dtype=bool)) \n",
+ "color_map = sns.diverging_palette(0, 10, as_cmap=True) \n",
+ "sns.heatmap(corrSP98, \n",
+ " mask = mascara,\n",
+ " cmap=\"viridis\",\n",
+ " square=True, \n",
+ " linewidth=0.5,\n",
+ " vmax=1,\n",
+ " cbar_kws={\"shrink\": .5},\n",
+ " annot=True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Mostramos la distribución de la variable consumo para cada tipo de carburantes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.violinplot(x=df.consume,y=df[\"gas_type\"]);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ironhack",
+ "language": "python",
+ "name": "ironhack"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/measurementsclean.csv b/measurementsclean.csv
new file mode 100644
index 0000000..714e34a
--- /dev/null
+++ b/measurementsclean.csv
@@ -0,0 +1,389 @@
+,distance,consume,speed,temp_inside,temp_outside,specials,gas_type,AC,rain,sun
+0,28.0,5.0,26,21.93491124260355,12,Unknown,E10,0,0,0
+1,12.0,4.2,30,21.93491124260355,13,Unknown,E10,0,0,0
+2,11.2,5.5,38,21.93491124260355,15,Unknown,E10,0,0,0
+3,12.9,3.9,36,21.93491124260355,14,Unknown,E10,0,0,0
+4,18.5,4.5,46,21.93491124260355,15,Unknown,E10,0,0,0
+5,8.3,6.4,50,21.93491124260355,10,Unknown,E10,0,0,0
+6,7.8,4.4,43,21.93491124260355,11,Unknown,E10,0,0,0
+7,12.3,5.0,40,21.93491124260355,6,Unknown,E10,0,0,0
+8,4.9,6.4,26,21.93491124260355,4,Unknown,E10,0,0,0
+9,11.9,5.3,30,21.93491124260355,9,Unknown,E10,0,0,0
+10,12.4,5.6,42,21.93491124260355,4,Unknown,E10,0,0,0
+11,11.8,4.6,38,21.93491124260355,0,Unknown,E10,0,0,0
+12,12.3,5.9,59,21.93491124260355,10,Unknown,E10,0,0,0
+13,24.7,5.1,58,21.93491124260355,12,Unknown,E10,0,0,0
+14,12.4,4.7,46,21.93491124260355,11,Unknown,E10,0,0,0
+15,17.3,5.1,24,21.93491124260355,5,Unknown,E10,0,0,0
+16,33.4,5.6,36,21.93491124260355,3,Unknown,E10,0,0,0
+17,11.8,5.1,32,21.93491124260355,3,Unknown,E10,0,0,0
+18,25.9,4.9,39,21.93491124260355,8,Unknown,E10,0,0,0
+19,11.8,4.7,40,21.93491124260355,4,Unknown,E10,0,0,0
+20,25.3,5.5,32,21.93491124260355,3,Unknown,E10,0,0,0
+21,14.2,5.9,38,21.93491124260355,1,Unknown,E10,0,0,0
+22,17.9,5.7,37,21.93491124260355,1,Unknown,E10,0,0,0
+23,11.8,4.7,36,21.93491124260355,1,Unknown,E10,0,0,0
+24,12.3,5.9,62,21.93491124260355,6,Unknown,E10,0,0,0
+25,12.4,4.1,57,21.93491124260355,9,Unknown,E10,0,0,0
+26,18.4,5.7,21,21.93491124260355,2,Unknown,E10,0,0,0
+27,18.4,5.8,28,21.93491124260355,3,Unknown,E10,0,0,0
+28,18.3,5.5,29,21.93491124260355,1,Unknown,E10,0,0,0
+29,18.4,5.7,35,21.93491124260355,4,Unknown,E10,0,0,0
+30,12.3,5.3,51,21.93491124260355,11,Unknown,E10,0,0,0
+31,11.8,5.0,29,21.93491124260355,10,Unknown,E10,0,0,0
+32,12.3,5.6,58,21.93491124260355,8,Unknown,E10,0,0,0
+33,32.6,4.8,40,21.93491124260355,7,Unknown,E10,0,0,0
+34,19.0,4.3,36,21.93491124260355,7,Unknown,E10,0,0,0
+35,12.1,5.7,36,21.93491124260355,8,Unknown,E10,0,0,0
+36,20.0,5.2,37,21.93491124260355,4,Unknown,E10,0,0,0
+37,4.9,7.4,26,21.93491124260355,6,Unknown,E10,0,0,0
+38,11.8,4.8,23,21.93491124260355,9,Unknown,E10,0,0,0
+39,12.3,6.5,58,21.93491124260355,7,Unknown,E10,0,0,0
+40,12.4,4.0,55,21.93491124260355,7,AC rain,E10,1,1,0
+41,4.5,5.0,29,21.93491124260355,7,AC,E10,1,0,0
+42,11.7,4.6,37,21.93491124260355,6,Unknown,E10,0,0,0
+43,10.2,4.3,44,21.93491124260355,7,Unknown,E10,0,0,0
+44,5.4,3.3,32,21.93491124260355,7,Unknown,SP98,0,0,0
+45,2.0,9.9,21,21.93491124260355,4,Unknown,SP98,0,0,0
+46,14.2,5.1,32,21.93491124260355,7,Unknown,SP98,0,0,0
+47,16.0,4.9,25,21.93491124260355,8,Unknown,SP98,0,0,0
+48,11.8,4.9,25,21.93491124260355,11,Unknown,SP98,0,0,0
+49,27.3,4.7,36,21.93491124260355,10,Unknown,SP98,0,0,0
+50,11.8,5.1,29,21.93491124260355,5,rain,SP98,0,1,0
+51,10.6,4.9,63,21.93491124260355,6,Unknown,SP98,0,0,0
+52,11.6,4.2,61,21.93491124260355,7,Unknown,SP98,0,0,0
+53,13.1,6.1,46,21.93491124260355,6,rain,SP98,0,1,0
+54,6.1,6.4,28,21.93491124260355,8,Unknown,SP98,0,0,0
+55,153.5,4.9,82,21.93491124260355,3,rain,SP98,0,1,0
+56,11.8,4.9,24,21.93491124260355,7,Unknown,SP98,0,0,0
+57,12.3,6.2,61,21.93491124260355,8,Unknown,SP98,0,0,0
+58,12.4,4.2,57,21.93491124260355,8,Unknown,SP98,0,0,0
+59,12.3,5.7,52,21.93491124260355,8,rain,SP98,0,1,0
+60,2.9,7.9,18,21.93491124260355,9,AC rain,SP98,1,1,0
+61,2.1,12.2,23,21.93491124260355,9,Unknown,SP98,1,1,0
+62,9.8,5.8,24,21.93491124260355,6,Unknown,SP98,0,0,0
+63,6.8,5.3,26,21.93491124260355,6,Unknown,SP98,0,0,0
+64,12.4,5.4,24,21.93491124260355,7,Unknown,SP98,0,0,0
+65,12.4,4.7,57,21.93491124260355,8,Unknown,SP98,0,0,0
+66,11.8,4.6,41,21.93491124260355,6,Unknown,SP98,0,0,0
+67,12.3,5.4,55,21.93491124260355,8,Unknown,SP98,0,0,0
+68,14.0,4.3,40,21.93491124260355,8,Unknown,SP98,0,0,0
+69,11.8,4.5,37,21.93491124260355,4,AC,SP98,1,0,0
+70,12.3,5.2,55,21.93491124260355,12,Unknown,SP98,0,0,0
+71,2.0,6.2,20,21.93491124260355,10,Unknown,SP98,0,0,0
+72,13.9,5.1,29,21.93491124260355,6,Unknown,SP98,0,0,0
+73,9.7,3.9,42,21.93491124260355,8,Unknown,SP98,0,0,0
+74,11.6,5.0,25,21.93491124260355,6,Unknown,SP98,0,0,0
+75,14.2,5.4,38,21.93491124260355,7,Unknown,SP98,0,0,0
+76,11.8,4.5,39,21.93491124260355,6,Unknown,SP98,0,0,0
+77,24.8,5.1,50,21.93491124260355,9,Unknown,SP98,0,0,0
+78,12.4,4.7,56,21.93491124260355,7,Unknown,SP98,0,0,0
+79,34.8,4.0,28,20.0,4,Unknown,SP98,0,0,0
+80,14.2,5.4,36,20.0,6,Unknown,SP98,0,0,0
+81,5.2,4.5,39,20.0,10,Unknown,SP98,0,0,0
+82,10.5,3.6,42,20.0,10,Unknown,SP98,0,0,0
+83,12.3,5.2,57,20.0,10,Unknown,SP98,0,0,0
+84,11.8,4.9,25,20.0,11,Unknown,SP98,0,0,0
+85,12.3,6.2,58,20.0,11,rain,SP98,0,1,0
+86,13.2,4.3,51,20.0,11,rain,SP98,0,1,0
+87,13.0,5.0,45,20.0,11,rain,SP98,0,1,0
+88,12.9,5.1,32,20.0,11,Unknown,SP98,0,0,0
+89,13.9,5.6,22,20.0,8,Unknown,SP98,0,0,0
+90,11.8,4.3,37,20.0,6,Unknown,SP98,0,0,0
+91,12.2,5.8,60,20.0,11,Unknown,SP98,0,0,0
+92,12.5,4.0,51,20.0,13,Unknown,SP98,0,0,0
+93,12.4,4.7,43,21.93491124260355,10,Unknown,SP98,0,0,0
+94,11.8,5.9,21,20.0,9,AC rain,SP98,1,1,0
+95,11.8,5.3,52,21.93491124260355,11,Unknown,SP98,0,0,0
+96,12.5,4.2,57,20.0,11,Unknown,SP98,0,0,0
+97,15.7,5.3,33,21.93491124260355,9,Unknown,SP98,0,0,0
+98,12.9,5.7,35,21.93491124260355,9,Unknown,SP98,0,0,0
+99,6.4,4.4,37,21.93491124260355,10,Unknown,SP98,0,0,0
+100,5.3,4.1,34,21.93491124260355,9,Unknown,SP98,0,0,0
+101,26.2,5.8,71,21.0,8,AC rain,SP98,1,1,0
+102,18.8,5.0,62,21.93491124260355,9,rain,SP98,0,1,0
+103,4.9,6.9,25,21.0,12,rain,SP98,0,1,0
+104,12.4,5.4,18,21.0,11,AC rain,SP98,1,1,0
+105,22.9,5.3,45,21.93491124260355,7,Unknown,SP98,0,0,0
+106,162.7,5.5,75,23.0,1,Unknown,SP98,0,0,0
+107,4.9,6.5,26,21.0,1,Unknown,SP98,0,0,0
+108,11.8,4.7,36,21.0,4,Unknown,SP98,0,0,0
+109,16.6,5.1,56,21.0,7,Unknown,SP98,0,0,0
+110,12.4,5.7,37,21.0,7,Unknown,SP98,0,0,0
+111,15.9,5.4,25,21.0,7,Unknown,SP98,0,0,0
+112,5.1,8.7,21,21.93491124260355,5,Unknown,SP98,0,0,0
+113,22.4,4.9,66,21.93491124260355,7,Unknown,SP98,0,0,0
+114,31.1,4.7,42,21.93491124260355,7,Unknown,SP98,0,0,0
+115,4.9,6.3,27,21.93491124260355,3,Unknown,SP98,0,0,0
+116,11.8,5.1,26,23.0,4,rain,SP98,0,1,0
+117,22.9,6.0,42,23.0,4,rain,SP98,0,1,0
+118,12.4,4.6,38,23.0,1,snow,SP98,0,1,0
+119,12.9,5.8,40,23.0,4,Unknown,SP98,0,0,0
+120,11.8,5.1,43,23.0,0,Unknown,SP98,0,0,0
+121,12.2,5.8,58,23.0,2,Unknown,SP98,0,0,0
+122,24.8,4.6,55,23.0,3,Unknown,SP98,0,0,0
+123,14.2,5.6,24,23.0,8,Unknown,SP98,0,0,0
+124,11.8,4.6,38,23.0,0,snow,SP98,0,1,0
+125,12.2,6.3,57,23.0,0,snow,SP98,0,1,0
+126,24.7,5.5,56,25.0,1,Unknown,SP98,0,0,0
+127,6.8,4.3,46,24.0,2,Unknown,SP98,0,0,0
+128,17.3,5.6,37,21.93491124260355,1,Unknown,SP98,0,0,0
+129,11.8,4.3,44,21.93491124260355,-3,Unknown,SP98,0,0,0
+130,15.9,5.7,46,21.93491124260355,5,Unknown,SP98,0,0,0
+131,5.1,6.4,39,21.93491124260355,4,Unknown,SP98,0,0,0
+132,16.1,4.5,33,21.93491124260355,6,Unknown,SP98,0,0,0
+133,11.8,4.5,43,21.93491124260355,3,Unknown,SP98,0,0,0
+134,4.2,6.0,26,21.93491124260355,5,Unknown,SP98,0,0,0
+135,17.4,5.1,30,21.93491124260355,5,Unknown,SP98,0,0,0
+136,23.5,6.0,25,21.93491124260355,5,rain,SP98,0,1,0
+137,11.8,4.5,38,21.93491124260355,5,rain,SP98,0,1,0
+138,12.3,6.1,61,21.93491124260355,10,rain,SP98,0,1,0
+139,16.1,5.4,24,21.93491124260355,7,rain,E10,0,1,0
+140,11.8,4.3,40,21.93491124260355,10,rain,E10,0,1,0
+141,12.3,5.4,58,21.93491124260355,13,Unknown,E10,0,0,0
+142,12.4,4.3,49,21.93491124260355,17,Unknown,E10,0,0,0
+143,7.0,5.2,25,21.93491124260355,17,Unknown,E10,0,0,0
+144,11.8,4.1,37,21.93491124260355,10,Unknown,E10,0,0,0
+145,20.1,4.4,41,21.93491124260355,18,Unknown,E10,0,0,0
+146,20.8,4.5,45,21.93491124260355,10,Unknown,E10,0,0,0
+147,1.7,10.8,14,21.93491124260355,10,rain,E10,0,1,0
+148,35.9,4.7,45,21.93491124260355,12,Unknown,E10,0,0,0
+149,36.9,4.8,52,21.93491124260355,5,Unknown,E10,0,0,0
+150,16.8,4.0,46,21.93491124260355,8,Unknown,E10,0,0,0
+151,9.9,5.0,28,21.93491124260355,9,Unknown,E10,0,0,0
+152,22.9,4.6,61,21.93491124260355,7,rain,E10,0,1,0
+153,17.3,5.0,61,21.93491124260355,6,AC rain,E10,1,1,0
+154,11.8,4.3,37,21.93491124260355,7,Unknown,E10,0,0,0
+155,36.6,5.2,80,21.93491124260355,7,rain,E10,0,1,0
+156,44.9,4.7,62,21.93491124260355,8,Unknown,E10,0,0,0
+157,11.8,4.2,34,21.93491124260355,9,rain,E10,0,1,0
+158,21.6,5.3,44,21.93491124260355,9,rain,E10,0,1,0
+159,39.4,5.3,60,21.93491124260355,9,rain,E10,0,1,0
+160,5.1,8.1,39,21.93491124260355,4,Unknown,E10,0,0,0
+161,26.6,4.8,38,21.93491124260355,7,Unknown,E10,0,0,0
+162,53.2,5.1,71,21.93491124260355,2,Unknown,E10,0,0,0
+163,18.9,4.4,53,21.93491124260355,2,Unknown,E10,0,0,0
+164,43.5,5.0,80,21.93491124260355,3,Unknown,E10,0,0,0
+165,6.1,6.3,26,21.93491124260355,5,Unknown,E10,0,0,0
+166,16.4,4.8,49,21.93491124260355,5,Unknown,E10,0,0,0
+167,12.3,6.1,40,21.93491124260355,6,Unknown,E10,0,0,0
+168,21.1,4.6,36,21.93491124260355,8,Unknown,E10,0,0,0
+169,21.1,4.8,43,21.93491124260355,7,Unknown,E10,0,0,0
+170,22.7,4.7,55,21.93491124260355,6,Unknown,E10,0,0,0
+171,44.4,4.8,38,21.93491124260355,8,Unknown,E10,0,0,0
+172,35.8,4.4,51,21.93491124260355,6,Unknown,E10,0,0,0
+173,11.8,4.9,44,21.93491124260355,0,Unknown,E10,0,0,0
+174,26.2,4.9,42,21.93491124260355,6,Unknown,E10,0,0,0
+175,40.6,4.4,44,21.0,3,Unknown,E10,0,0,0
+176,12.4,5.3,38,21.0,-5,Unknown,E10,0,0,0
+177,14.1,5.3,47,21.0,-3,Unknown,E10,0,0,0
+178,58.7,4.8,75,21.0,0,Unknown,E10,0,0,0
+179,16.2,5.2,29,21.0,0,Unknown,E10,0,0,0
+180,12.3,4.9,50,21.93491124260355,0,Unknown,E10,0,0,0
+181,12.3,7.1,52,21.93491124260355,0,AC snow,E10,1,1,0
+182,12.4,5.2,51,21.93491124260355,1,Unknown,E10,0,0,0
+183,31.8,4.7,59,21.93491124260355,3,Unknown,E10,0,0,0
+184,12.3,5.1,55,21.93491124260355,8,Unknown,E10,0,0,0
+185,51.6,5.0,73,21.93491124260355,12,Unknown,E10,0,0,0
+186,38.6,4.6,44,21.93491124260355,10,Unknown,E10,0,0,0
+187,12.3,4.8,41,21.93491124260355,7,Unknown,E10,0,0,0
+188,81.2,4.4,69,22.0,13,Unknown,E10,0,0,0
+189,130.3,4.6,85,22.0,12,Unknown,E10,0,0,0
+190,67.2,4.3,67,22.0,18,Unknown,E10,0,0,0
+191,43.7,4.7,44,22.0,9,half rain half sun,SP98,0,1,0
+192,12.1,4.2,43,22.0,4,Unknown,SP98,0,0,0
+193,56.1,4.8,82,22.0,13,Unknown,SP98,0,0,0
+194,39.0,4.1,61,22.0,16,Unknown,SP98,0,0,0
+195,11.8,4.5,41,21.93491124260355,13,Unknown,SP98,0,0,0
+196,38.5,4.8,63,21.93491124260355,14,Unknown,SP98,0,0,0
+197,28.2,4.6,54,21.93491124260355,14,Unknown,SP98,0,0,0
+198,2.9,7.4,24,21.93491124260355,14,Unknown,SP98,0,0,0
+199,6.1,5.6,24,21.93491124260355,13,Unknown,SP98,0,0,0
+200,19.6,4.9,43,21.93491124260355,13,Unknown,SP98,0,0,0
+201,22.2,3.8,42,21.93491124260355,15,Unknown,SP98,0,0,0
+202,13.6,4.5,44,19.0,18,Unknown,SP98,0,0,0
+203,12.6,4.1,33,21.93491124260355,17,Unknown,SP98,0,0,0
+204,8.7,5.3,28,21.93491124260355,12,AC rain,SP98,1,1,0
+205,7.9,4.7,31,21.93491124260355,12,AC,SP98,1,0,0
+206,2.4,9.0,26,20.0,10,Unknown,SP98,0,0,0
+207,4.9,6.3,26,20.0,10,Unknown,SP98,0,0,0
+208,18.1,3.6,36,20.0,19,Unknown,SP98,0,0,0
+209,25.9,3.7,39,20.0,21,Unknown,SP98,0,0,0
+210,1.3,11.5,21,20.0,10,Unknown,SP98,0,0,0
+211,14.1,5.0,22,20.0,12,Unknown,SP98,0,0,0
+212,13.4,5.5,31,20.0,9,Unknown,SP98,0,0,0
+213,6.4,4.7,33,20.0,8,Unknown,SP98,0,0,0
+214,12.9,4.5,42,20.0,13,Unknown,SP98,0,0,0
+215,12.1,4.4,33,21.93491124260355,5,Unknown,SP98,0,0,0
+216,15.7,4.1,32,21.93491124260355,13,Unknown,SP98,0,0,0
+217,16.2,4.4,26,21.93491124260355,11,Unknown,SP98,0,0,0
+218,12.8,4.6,22,21.93491124260355,12,Unknown,SP98,0,0,0
+219,19.0,4.4,58,21.93491124260355,17,sun,SP98,0,0,1
+220,29.0,4.0,27,21.93491124260355,12,Unknown,SP98,0,0,0
+221,12.1,5.0,32,21.93491124260355,9,Unknown,SP98,0,0,0
+222,12.3,5.2,55,21.93491124260355,10,Unknown,SP98,0,0,0
+223,24.8,4.0,56,21.93491124260355,11,Unknown,SP98,0,0,0
+224,12.9,5.1,34,21.93491124260355,8,rain,SP98,0,1,0
+225,11.8,4.5,39,21.93491124260355,3,Unknown,SP98,0,0,0
+226,31.4,4.6,62,21.93491124260355,11,Unknown,SP98,0,0,0
+227,19.0,5.1,53,21.93491124260355,4,rain,SP98,0,1,0
+228,13.0,5.7,38,21.93491124260355,3,AC rain,SP98,1,1,0
+229,11.8,4.8,42,21.93491124260355,2,Unknown,SP98,0,0,0
+230,13.0,6.2,32,21.93491124260355,4,Unknown,SP98,0,0,0
+231,11.8,5.0,43,21.93491124260355,1,Unknown,SP98,0,0,0
+232,27.1,5.0,69,21.93491124260355,8,Unknown,SP98,0,0,0
+233,5.2,4.6,38,21.93491124260355,8,Unknown,SP98,0,0,0
+234,19.0,4.5,29,21.93491124260355,10,Unknown,E10,0,0,0
+235,12.4,4.8,38,21.93491124260355,1,Unknown,E10,0,0,0
+236,25.2,5.0,55,21.93491124260355,9,Unknown,E10,0,0,0
+237,14.3,4.8,36,21.93491124260355,10,Unknown,E10,0,0,0
+238,11.8,4.6,40,21.93491124260355,2,Unknown,E10,0,0,0
+239,16.9,4.5,48,21.93491124260355,9,sun,E10,0,0,1
+240,12.4,4.6,55,21.93491124260355,11,sun,E10,0,0,1
+241,17.4,4.4,36,21.93491124260355,12,sun,E10,0,0,1
+242,9.2,5.7,33,21.93491124260355,8,rain,E10,0,1,0
+243,12.3,5.8,54,21.93491124260355,10,rain,E10,0,1,0
+244,13.0,5.9,32,21.93491124260355,10,Unknown,E10,0,0,0
+245,11.8,6.1,16,21.93491124260355,6,rain,E10,0,1,0
+246,13.0,5.7,37,21.93491124260355,11,rain,E10,0,1,0
+247,12.3,5.0,42,21.93491124260355,10,Unknown,E10,0,0,0
+248,12.3,5.2,57,21.93491124260355,15,sun,E10,0,0,1
+249,12.5,4.3,57,21.93491124260355,16,sun,E10,0,0,1
+250,31.5,4.1,30,21.93491124260355,16,sun,E10,0,0,1
+251,11.8,4.4,42,21.93491124260355,8,Unknown,E10,0,0,0
+252,24.9,4.5,53,21.93491124260355,14,Unknown,E10,0,0,0
+253,17.0,3.9,46,21.93491124260355,14,sun,E10,0,0,1
+254,2.0,8.1,20,21.93491124260355,14,Unknown,E10,0,0,0
+255,11.8,4.4,33,21.93491124260355,8,Unknown,E10,0,0,0
+256,7.4,5.0,31,21.93491124260355,12,Unknown,E10,0,0,0
+257,12.4,4.7,55,21.93491124260355,14,sun,E10,0,0,1
+258,2.0,6.0,22,21.93491124260355,14,Unknown,E10,0,0,0
+259,14.0,5.0,41,21.93491124260355,8,Unknown,E10,0,0,0
+260,25.7,5.0,45,21.93491124260355,7,Unknown,E10,0,0,0
+261,24.5,3.9,50,21.93491124260355,15,sun,E10,0,0,1
+262,11.8,4.5,28,21.93491124260355,12,Unknown,E10,0,0,0
+263,4.1,5.4,24,21.93491124260355,13,Unknown,E10,0,0,0
+264,4.2,5.6,29,22.0,17,Unknown,E10,0,0,0
+265,4.2,3.9,29,22.0,18,sun,E10,0,0,1
+266,16.0,4.0,40,22.0,10,Unknown,E10,0,0,0
+267,22.9,4.0,29,21.93491124260355,21,Unknown,E10,0,0,0
+268,16.0,3.8,42,21.93491124260355,8,Unknown,E10,0,0,0
+269,15.4,4.5,50,22.0,14,Unknown,E10,0,0,0
+270,16.0,3.8,41,22.0,12,Unknown,E10,0,0,0
+271,4.2,5.6,32,22.0,18,Unknown,E10,0,0,0
+272,101.9,5.2,75,22.0,18,Unknown,E10,0,0,0
+273,93.9,4.8,88,23.0,18,AC sun,E10,1,0,1
+274,25.7,4.9,50,22.0,10,rain,SP98,0,1,0
+275,16.0,4.1,40,22.0,10,Unknown,SP98,0,0,0
+276,16.1,4.5,32,22.0,19,Unknown,SP98,0,0,0
+277,16.0,4.4,40,22.0,7,AC rain,SP98,1,1,0
+278,16.0,4.5,41,22.0,11,Unknown,SP98,0,0,0
+279,24.7,4.5,26,22.0,10,Unknown,SP98,0,0,0
+280,16.0,3.9,42,22.0,8,Unknown,SP98,0,0,0
+281,15.4,4.6,43,22.0,16,Unknown,SP98,0,0,0
+282,16.0,3.8,40,22.0,8,Unknown,SP98,0,0,0
+283,32.1,4.5,50,22.0,16,Unknown,SP98,0,0,0
+284,25.9,4.4,40,22.0,14,Unknown,SP98,0,0,0
+285,48.6,4.3,44,22.0,12,Unknown,SP98,0,0,0
+286,37.2,4.0,45,22.0,20,sun,SP98,0,0,1
+287,28.8,3.9,35,22.0,15,sun,SP98,0,0,1
+288,6.7,5.0,30,22.0,17,Unknown,SP98,0,0,0
+289,7.4,4.1,25,22.0,18,sun,SP98,0,0,1
+290,17.3,4.1,22,22.0,25,sun,SP98,0,0,1
+291,6.6,5.6,43,22.0,16,Unknown,SP98,0,0,0
+292,14.3,4.1,26,22.0,20,Unknown,SP98,0,0,0
+293,13.3,4.6,33,22.0,18,Unknown,SP98,0,0,0
+294,8.3,4.9,26,22.0,23,Unknown,SP98,0,0,0
+295,12.7,4.5,39,22.0,27,sun,SP98,0,0,1
+296,16.5,4.1,47,22.0,14,Unknown,SP98,0,0,0
+297,20.6,4.1,38,22.0,21,Unknown,SP98,0,0,0
+298,16.3,4.5,58,22.0,16,Unknown,SP98,0,0,0
+299,18.7,4.2,65,25.0,18,sun ac,SP98,1,0,1
+300,36.5,3.9,54,23.0,18,sun,SP98,0,0,1
+301,19.0,5.0,35,22.0,15,sun ac,SP98,1,0,1
+302,16.6,4.4,46,22.0,5,ac,SP98,1,0,0
+303,29.9,4.5,32,22.0,18,ac,SP98,1,0,0
+304,16.0,3.8,42,22.0,11,Unknown,SP98,0,0,0
+305,21.1,5.1,33,22.0,10,rain,SP98,0,1,0
+306,16.0,3.9,40,22.0,10,Unknown,SP98,0,0,0
+307,11.9,5.3,34,22.0,13,Unknown,SP98,0,0,0
+308,10.1,4.2,35,22.0,16,Unknown,SP98,0,0,0
+309,31.9,4.3,33,22.0,16,Unknown,SP98,0,0,0
+310,18.7,4.0,60,22.0,13,Unknown,SP98,0,0,0
+311,10.8,4.7,48,22.0,17,sun,SP98,0,0,1
+312,19.8,4.0,56,22.0,17,sun,SP98,0,0,1
+313,11.3,4.3,38,22.0,17,Unknown,SP98,0,0,0
+314,11.5,5.3,53,22.0,15,Unknown,SP98,0,0,0
+315,21.4,5.2,51,22.0,13,Unknown,SP98,0,0,0
+316,32.0,4.9,53,22.0,15,Unknown,SP98,0,0,0
+317,41.9,4.7,53,22.0,14,Unknown,SP98,0,0,0
+318,211.0,4.6,80,22.0,20,Unknown,SP98,0,0,0
+319,216.1,5.3,90,22.0,21,sun,SP98,0,0,1
+320,25.5,3.6,27,22.0,27,sun,SP98,0,0,1
+321,16.6,4.2,52,22.0,15,rain,SP98,0,1,0
+322,25.9,4.1,43,22.0,24,Unknown,SP98,0,0,0
+323,16.5,4.0,48,22.0,15,Unknown,SP98,0,0,0
+324,22.1,3.9,29,22.0,26,Unknown,SP98,0,0,0
+325,16.6,3.7,49,22.0,17,Unknown,E10,0,0,0
+326,15.4,4.5,43,22.0,24,Unknown,E10,0,0,0
+327,16.5,3.9,43,22.0,17,Unknown,E10,0,0,0
+328,15.4,5.1,21,22.0,24,Unknown,E10,0,0,0
+329,18.4,4.2,59,22.0,24,Unknown,E10,0,0,0
+330,39.5,4.2,58,22.0,25,Unknown,E10,0,0,0
+331,21.5,4.8,47,25.0,27,AC Sun,E10,1,0,1
+332,4.8,4.7,26,25.0,18,Unknown,E10,0,0,0
+333,100.9,4.7,87,21.93491124260355,22,Unknown,E10,0,0,0
+334,129.7,4.6,58,24.0,21,ac,E10,0,0,0
+335,16.6,3.8,43,21.93491124260355,16,Unknown,E10,0,0,0
+336,15.4,4.2,50,21.93491124260355,21,Unknown,E10,0,0,0
+337,16.0,3.7,45,22.0,17,Unknown,E10,0,0,0
+338,16.0,4.2,41,22.0,15,Unknown,E10,0,0,0
+339,15.4,4.1,45,22.0,24,Unknown,E10,0,0,0
+340,16.0,4.0,43,22.0,16,Unknown,E10,0,0,0
+341,21.0,3.8,37,22.0,21,Unknown,E10,0,0,0
+342,15.4,3.8,47,22.0,24,Unknown,E10,0,0,0
+343,16.0,3.8,42,22.0,17,Unknown,E10,0,0,0
+344,16.1,5.1,30,25.0,16,ac rain,E10,1,1,0
+345,16.0,4.0,42,22.0,15,Unknown,E10,0,0,0
+346,15.4,4.8,40,22.0,18,Unknown,E10,0,0,0
+347,17.2,3.9,35,22.0,16,Unknown,E10,0,0,0
+348,16.6,4.6,50,22.0,25,Unknown,E10,0,0,0
+349,18.3,4.3,46,22.0,16,Unknown,SP98,0,0,0
+350,16.1,4.8,36,22.0,18,rain,SP98,0,1,0
+351,16.0,4.1,46,22.0,17,Unknown,SP98,0,0,0
+352,16.3,4.0,35,22.0,23,Unknown,SP98,0,0,0
+353,16.0,3.7,44,22.0,16,Unknown,SP98,0,0,0
+354,18.3,4.3,46,22.0,23,Unknown,SP98,0,0,0
+355,4.5,5.2,23,22.0,23,sun,SP98,0,0,1
+356,17.0,4.2,48,22.0,16,Unknown,SP98,0,0,0
+357,6.1,5.0,35,21.93491124260355,18,Unknown,SP98,0,0,0
+358,6.1,5.0,33,22.0,24,sun,SP98,0,0,1
+359,17.3,4.2,36,22.0,22,Unknown,SP98,0,0,0
+360,6.0,4.8,27,22.0,17,Unknown,SP98,0,0,0
+361,16.0,4.1,45,22.0,15,Unknown,SP98,0,0,0
+362,15.4,4.9,55,22.0,18,Unknown,SP98,0,0,0
+363,15.4,4.3,39,22.0,23,Unknown,SP98,0,0,0
+364,16.0,3.9,35,22.0,18,Unknown,SP98,0,0,0
+365,15.4,4.5,52,22.0,26,Unknown,SP98,0,0,0
+366,16.6,4.1,48,21.93491124260355,30,sun ac,SP98,1,0,1
+367,15.4,5.0,48,21.93491124260355,31,ac,SP98,1,0,0
+368,16.0,4.0,43,22.0,21,ac,SP98,1,0,0
+369,15.4,4.5,42,22.0,30,Unknown,SP98,0,0,0
+370,16.0,3.6,43,22.0,19,Unknown,SP98,0,0,0
+371,20.9,4.1,33,22.0,21,Unknown,SP98,0,0,0
+372,32.1,3.9,47,22.0,26,Unknown,SP98,0,0,0
+373,15.3,6.6,16,24.0,27,ac,SP98,1,0,0
+374,28.5,4.0,44,25.0,23,ac,SP98,1,0,0
+375,11.9,3.7,33,25.0,19,Unknown,SP98,0,0,0
+376,14.5,4.3,36,25.0,22,Unknown,SP98,0,0,0
+377,16.6,4.1,49,22.0,26,sun,SP98,0,0,1
+378,16.2,4.4,39,21.93491124260355,17,ac,SP98,1,0,0
+379,16.7,3.8,44,21.93491124260355,23,sun,SP98,0,0,1
+380,8.3,5.0,52,21.93491124260355,27,Unknown,SP98,0,0,0
+381,5.5,3.7,33,21.93491124260355,28,sun,SP98,0,0,1
+382,13.6,3.7,33,21.93491124260355,28,sun,SP98,0,0,1
+383,16.0,3.7,39,21.93491124260355,18,Unknown,SP98,0,0,0
+384,16.1,4.3,38,25.0,31,AC,SP98,1,0,0
+385,16.0,3.8,45,25.0,19,Unknown,SP98,0,0,0
+386,15.4,4.6,42,25.0,31,AC,SP98,1,0,0
+387,14.7,5.0,25,25.0,30,AC,SP98,1,0,0