diff --git a/Code/0.Exploration.ipynb b/Code/0.Exploration.ipynb new file mode 100644 index 0000000..58f39f3 --- /dev/null +++ b/Code/0.Exploration.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "# Opening the csv file into a dataframe,\n", + "df = pd.read_csv('../Data/measurements.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 388 entries, 0 to 387\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 distance 388 non-null object\n", + " 1 consume 388 non-null object\n", + " 2 speed 388 non-null int64 \n", + " 3 temp_inside 376 non-null object\n", + " 4 temp_outside 388 non-null int64 \n", + " 5 specials 93 non-null object\n", + " 6 gas_type 388 non-null object\n", + " 7 AC 388 non-null int64 \n", + " 8 rain 388 non-null int64 \n", + " 9 sun 388 non-null int64 \n", + " 10 refill liters 13 non-null object\n", + " 11 refill gas 13 non-null object\n", + "dtypes: int64(5), object(7)\n", + "memory usage: 36.5+ KB\n" + ] + } + ], + "source": [ + "# Now we can get some info about the dataframe, see what we are dealing with,\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['E10', nan, 'SP98'], dtype=object)" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let's, now, check the columns with nulls in them,\n", + "# df.specials.unique()\n", + "#df['refill liters'].unique()\n", + "df['refill gas'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coments on the dataframe:\n", + "First, the dataframe is preaty clean, there aren't nulls in most columns. \n", + "\n", + "The first decision I am taking is to drop most columns with nulls in them. I consider it justified as the information in specials can be gathered using the other columns, so it doesn't add any information to the study. The only special that I haven't seen in other columns is snow, so we might want to add a new column in order to not lose this information. And the other 2 columns with nulls are refill liters and refill gas, if they were important to the study they are just present in 13 rows which is an irrelevant subset. \n", + "\n", + "Lastly we also find nulls in temp_outside. In this case I am going to keep the column as the difference between the temperature inside an outside might be relevant. \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "df['snow'] = df.specials.apply(lambda x: True if type(x) != float and 'snow' in x else False)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's do some dropping,\n", + "cols_2drop = ['specials', 'refill liters','refill gas']\n", + "df.drop(columns=cols_2drop, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speedtemp_outsideACrainsun
count388.000000388.000000388.000000388.000000388.000000
mean41.92783511.3582470.0773200.1237110.082474
std13.5985246.9915420.2674430.3296770.275441
min14.000000-5.0000000.0000000.0000000.000000
25%32.7500007.0000000.0000000.0000000.000000
50%40.50000010.0000000.0000000.0000000.000000
75%50.00000016.0000000.0000000.0000000.000000
max90.00000031.0000001.0000001.0000001.000000
\n", + "
" + ], + "text/plain": [ + " speed temp_outside AC rain sun\n", + "count 388.000000 388.000000 388.000000 388.000000 388.000000\n", + "mean 41.927835 11.358247 0.077320 0.123711 0.082474\n", + "std 13.598524 6.991542 0.267443 0.329677 0.275441\n", + "min 14.000000 -5.000000 0.000000 0.000000 0.000000\n", + "25% 32.750000 7.000000 0.000000 0.000000 0.000000\n", + "50% 40.500000 10.000000 0.000000 0.000000 0.000000\n", + "75% 50.000000 16.000000 0.000000 0.000000 0.000000\n", + "max 90.000000 31.000000 1.000000 1.000000 1.000000" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's now dig a little bit into the data and the numbers and stadistics,\n", + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "# Here we see that we are missing a column, that is probably due to the format in which pandas is getting the info from that column,\n", + "# let's turn it into float numbers,\n", + "df.temp_inside = df.temp_inside.apply(lambda x: float(x.replace(',','.')) if type(x) != float else x)\n", + "# The same thing happens with some other columns, such as consume or distance,\n", + "df.consume = df.consume.apply(lambda x: float(x.replace(',','.')))\n", + "df.distance = df.distance.apply(lambda x: float(x.replace(',','.')))\n", + "# It is also obvious that sun, rain and AC should be booleans (0 or 1),\n", + "df.rain = df.rain.apply(lambda x: bool(x))\n", + "df.sun = df.sun.apply(lambda x: bool(x))\n", + "df.AC = df.AC.apply(lambda x: bool(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outside
count388.000000388.000000388.000000376.000000388.000000
mean19.6528354.91237141.92783521.92952111.358247
std22.6678371.03317213.5985241.0104556.991542
min1.3000003.30000014.00000019.000000-5.000000
25%11.8000004.30000032.75000021.5000007.000000
50%14.6000004.70000040.50000022.00000010.000000
75%19.0000005.30000050.00000022.50000016.000000
max216.10000012.20000090.00000025.50000031.000000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside\n", + "count 388.000000 388.000000 388.000000 376.000000 388.000000\n", + "mean 19.652835 4.912371 41.927835 21.929521 11.358247\n", + "std 22.667837 1.033172 13.598524 1.010455 6.991542\n", + "min 1.300000 3.300000 14.000000 19.000000 -5.000000\n", + "25% 11.800000 4.300000 32.750000 21.500000 7.000000\n", + "50% 14.600000 4.700000 40.500000 22.000000 10.000000\n", + "75% 19.000000 5.300000 50.000000 22.500000 16.000000\n", + "max 216.100000 12.200000 90.000000 25.500000 31.000000" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let's check if we solved the problem,\n", + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATg0lEQVR4nO3dfYxld13H8feXLg+FgS64MK67q9NgRbEr2N5UDAZnqGKhhK2KTUmDu1KzEopWXQILGEtCGoukEvABstqGJTYdai1pbanSVMZKwlZ2a2H6BKx1C52UXZF2YaARF77+cc/IZXae7rkzc+758X4lm7n3d54+c3v6mTNnzrk3MhNJUlme1HQASdLqs9wlqUCWuyQVyHKXpAJZ7pJUoA1NBwDYtGlTjo2N1Vr2m9/8Js94xjNWN9A6MXszzN6MtmYf5tyHDh36amY+d6FpQ1HuY2NjHDx4sNayU1NTjI+Pr26gdWL2Zpi9GW3NPsy5I+LhxaZ5WkaSCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgo0FHeoqj3G9t4KwJ7tJ9hVPV4PR648f922JZXAI3dJKpDlLkkFstwlqUCWuyQVaNlyj4hrIuJYRNy7wLQ9EZERsal6HhHxgYg4HBGfi4iz1iK0JGlpKzly/zBw3vzBiNgGvAL4Us/wK4Ezqn+7gQ8OHlGS1K9lyz0z7wS+tsCk9wFvBbJnbAfwkew6AGyMiM2rklSStGKRmcvPFDEG3JKZZ1bPdwAvz8zLIuII0MnMr0bELcCVmfmpar47gLdl5kkfsxQRu+ke3TM6Onr25ORkrW9gdnaWkZGRWss2rY3Zp2eOAzB6Khx9Yv22u33Laau2rja+7nPMvv6GOffExMShzOwsNK3vm5gi4unAO+iekqktM/cB+wA6nU7W/RirYf4IrOW0MfuunpuYrppev3vgjlw8vmrrauPrPsfs66+tuev83/l84HTgsxEBsBW4OyLOAWaAbT3zbq3GJEnrqO9LITNzOjOfl5ljmTkGPAKclZlfAW4GfrO6auYlwPHMfHR1I0uSlrOSSyGvAz4NvCAiHomIS5aY/ePAQ8Bh4K+BN61KSklSX5Y9LZOZr1tm+ljP4wQuHTyWJGkQ3qEqSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFWglH5B9TUQci4h7e8beGxEPRsTnIuJjEbGxZ9rbI+JwRHw+In5ljXJLkpawkiP3DwPnzRu7HTgzM38G+ALwdoCIeCFwEfDT1TJ/FRGnrFpaSdKKLFvumXkn8LV5Y5/IzBPV0wPA1urxDmAyM/8nM/8TOAycs4p5JUkrsBrn3N8A3FY93gJ8uWfaI9WYJGkdRWYuP1PEGHBLZp45b/ydQAf4tczMiPgL4EBm/m01/Wrgtsy8YYF17gZ2A4yOjp49OTlZ6xuYnZ1lZGSk1rJNa2P26ZnjAIyeCkefWL/tbt9y2qqtq42v+xyzr79hzj0xMXEoMzsLTdtQd6URsQt4NXBufu8nxAywrWe2rdXYSTJzH7APoNPp5Pj4eK0cU1NT1F22aW3MvmvvrQDs2X6Cq6Zr7z59O3Lx+Kqtq42v+xyzr7+25q51WiYizgPeCrwmM7/VM+lm4KKIeGpEnA6cAfzb4DElSf1Y9tArIq4DxoFNEfEIcDndq2OeCtweEdA9FfPGzLwvIq4H7gdOAJdm5nfWKrwkaWHLlntmvm6B4auXmP8K4IpBQkmSBuMdqpJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKtCyH5AdEdcArwaOZeaZ1dhzgI8CY8AR4MLMfCwiAng/8CrgW8CuzLx7baLrB8nY3ltXbV17tp9gVx/rO3Ll+au2bWm9rOTI/cPAefPG9gJ3ZOYZwB3Vc4BXAmdU/3YDH1ydmJKkfixb7pl5J/C1ecM7gP3V4/3ABT3jH8muA8DGiNi8SlklSSsUmbn8TBFjwC09p2Uez8yN1eMAHsvMjRFxC3BlZn6qmnYH8LbMPLjAOnfTPbpndHT07MnJyVrfwOzsLCMjI7WWbVobs0/PHAdg9FQ4+kTDYWrqN/v2LaetXZg+tXGfmdPW7MOce2Ji4lBmdhaatuw59+VkZkbE8j8hTl5uH7APoNPp5Pj4eK3tT01NUXfZprUx+9y56j3bT3DV9MC7TyP6zX7k4vG1C9OnNu4zc9qava25614tc3TudEv19Vg1PgNs65lvazUmSVpHdcv9ZmBn9XgncFPP+G9G10uA45n56IAZJUl9WsmlkNcB48CmiHgEuBy4Erg+Ii4BHgYurGb/ON3LIA/TvRTyt9YgsyRpGcuWe2a+bpFJ5y4wbwKXDhpKkjQY71CVpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCjRQuUfEH0TEfRFxb0RcFxFPi4jTI+KuiDgcER+NiKesVlhJ0srULveI2AL8HtDJzDOBU4CLgPcA78vMHwceAy5ZjaCSpJUb9LTMBuDUiNgAPB14FHg5cEM1fT9wwYDbkCT1KTKz/sIRlwFXAE8AnwAuAw5UR+1ExDbgturIfv6yu4HdAKOjo2dPTk7WyjA7O8vIyEi9b6Bhbcw+PXMcgNFT4egTDYepqd/s27ectnZh+tTGfWZOW7MPc+6JiYlDmdlZaNqGuiuNiGcDO4DTgceBvwPOW+nymbkP2AfQ6XRyfHy8Vo6pqSnqLtu0NmbftfdWAPZsP8FV07V3n0b1m/3IxeNrF6ZPbdxn5rQ1e1tzD3Ja5peA/8zM/8rM/wVuBF4KbKxO0wBsBWYGzChJ6tMg5f4l4CUR8fSICOBc4H7gk8Brq3l2AjcNFlGS1K/a5Z6Zd9H9w+ndwHS1rn3A24A/jIjDwA8BV69CTklSHwY6aZqZlwOXzxt+CDhnkPVKkgbjHaqSVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSrQQOUeERsj4oaIeDAiHoiIn4+I50TE7RHxxerrs1crrCRpZQY9cn8/8I+Z+ZPAi4AHgL3AHZl5BnBH9VyStI5ql3tEnAa8DLgaIDO/nZmPAzuA/dVs+4ELBosoSepXZGa9BSNeDOwD7qd71H4IuAyYycyN1TwBPDb3fN7yu4HdAKOjo2dPTk7WyjE7O8vIyEitZZvWxuzTM8cBGD0Vjj7RcJia+s2+fctpaxemT23cZ+a0Nfsw556YmDiUmZ2Fpg1S7h3gAPDSzLwrIt4PfB343d4yj4jHMnPJ8+6dTicPHjxYK8fU1BTj4+O1lm1aG7OP7b0VgD3bT3DV9IaG09TTluxHrjz/pLE27jNz2pp9mHNHxKLlPsg590eARzLzrur5DcBZwNGI2FxteDNwbIBtSJJqqF3umfkV4MsR8YJq6Fy6p2huBnZWYzuBmwZKKEnq26C/m/4ucG1EPAV4CPgtuj8wro+IS4CHgQsH3IYkqU8DlXtm3gMsdL7n3EHWK0kajHeoSlKBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQUauNwj4pSI+PeIuKV6fnpE3BURhyPio9WHZ0uS1tFqHLlfBjzQ8/w9wPsy88eBx4BLVmEbkqQ+DFTuEbEVOB/4m+p5AC8Hbqhm2Q9cMMg2JEn9i8ysv3DEDcCfAM8E3gLsAg5UR+1ExDbgtsw8c4FldwO7AUZHR8+enJyslWF2dpaRkZFayzatjdmnZ44DMHoqHH2i4TA1tSX79i2nnTTWxn1mTluzD3PuiYmJQ5nZWWjahrorjYhXA8cy81BEjPe7fGbuA/YBdDqdHB/vexUATE1NUXfZprUx+669twKwZ/sJrpquvfs0qi3Zj1w8ftJYG/eZOW3N3tbcg+zhLwVeExGvAp4GPAt4P7AxIjZk5glgKzAzeExJUj9qn3PPzLdn5tbMHAMuAv45My8GPgm8tpptJ3DTwCklSX1Zi+vc3wb8YUQcBn4IuHoNtiFJWsKqnHjMzClgqnr8EHDOaqxXklSPd6hKUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUoOH/IMkhNlZ9nmhde7af+P/PJO3HkSvPH2i7ksrnkbskFchyl6QCWe6SVKDa5R4R2yLikxFxf0TcFxGXVePPiYjbI+KL1ddnr15cSdJKDHLkfgLYk5kvBF4CXBoRLwT2Andk5hnAHdVzSdI6ql3umfloZt5dPf4G8ACwBdgB7K9m2w9cMGBGSVKfIjMHX0nEGHAncCbwpczcWI0H8Njc83nL7AZ2A4yOjp49OTlZa9uzs7OMjIzUWnZQ0zPHB1p+9FQ4+kT/y23fctpA2x3E3PdcN/swaEv2hf47N7m/D6qt2Yc598TExKHM7Cw0beByj4gR4F+AKzLzxoh4vLfMI+KxzFzyvHun08mDBw/W2v7U1BTj4+O1lh3UalznftV0/7caNHmd+9z3XDf7MGhL9oX+Oze5vw+qrdmHOXdELFruA10tExFPBv4euDYzb6yGj0bE5mr6ZuDYINuQJPVvkKtlArgaeCAz/6xn0s3AzurxTuCm+vEkSXUM8rvpS4HXA9MRcU819g7gSuD6iLgEeBi4cKCEkqS+1S73zPwUEItMPrfueiVJg/MOVUkqkOUuSQUa/uvBpB9QC11qW/dtovvhW0qXwXKX9ANvqXtW1voH6lr9MPW0jCQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFcg7VCUNjUE/3Uzf45G7JBXIcpekAlnuklQgz7m3kOclJS3HI3dJKpBH7pK+z1r9ZrgeHzSi71mzI/eIOC8iPh8RhyNi71ptR5J0sjU5co+IU4C/BH4ZeAT4TETcnJn3r/a2pmeOezQgSfOs1ZH7OcDhzHwoM78NTAI71mhbkqR5IjNXf6URrwXOy8zfrp6/Hvi5zHxzzzy7gd3V0xcAn6+5uU3AVweI2ySzN8PszWhr9mHO/WOZ+dyFJjT2B9XM3AfsG3Q9EXEwMzurEGndmb0ZZm9GW7O3NfdanZaZAbb1PN9ajUmS1sFalftngDMi4vSIeApwEXDzGm1LkjTPmpyWycwTEfFm4J+AU4BrMvO+tdgWq3Bqp0Fmb4bZm9HW7K3MvSZ/UJUkNcu3H5CkAlnuklSgVpV7RFwTEcci4t6esRdFxKcjYjoi/iEintVkxsVExLaI+GRE3B8R90XEZdX4cyLi9oj4YvX12U1n7bVE7t+onn83IobyMrElsr83Ih6MiM9FxMciYmPDUU+yRPZ3V7nviYhPRMSPNJ11vsWy90zfExEZEZuayriYJV73d0XETPW63xMRr2o667IyszX/gJcBZwH39ox9BvjF6vEbgHc3nXOR7JuBs6rHzwS+ALwQ+FNgbzW+F3hP01lXmPun6N58NgV0ms7ZZ/ZXABuq8fcM22u+TPZn9czze8CHms660uzV8210L7R4GNjUdNY+Xvd3AW9pOl8//1p15J6ZdwJfmzf8E8Cd1ePbgV9f11ArlJmPZubd1eNvAA8AW+i+LcP+arb9wAWNBFzEYrkz84HMrHtX8bpYIvsnMvNENdsBuvdhDJUlsn+9Z7ZnAEN3RcQS+zrA+4C3MoS5YdnsrdKqcl/EfXzvfWt+g++/eWooRcQY8LPAXcBoZj5aTfoKMNpUruXMy90qS2R/A3Dbugfqw/zsEXFFRHwZuBj44wajLas3e0TsAGYy87PNplqZBfaZN1enxK4ZttOnCymh3N8AvCkiDtH9NerbDedZUkSMAH8P/P68ozCy+7vgUB7RLJV72C2WPSLeCZwArm0q23IWyp6Z78zMbXRzv3mp5ZvUm53u6/wOhvyH0ZwFXvcPAs8HXgw8ClzVXLqVaX25Z+aDmfmKzDwbuA74j6YzLSYinkx3h7k2M2+sho9GxOZq+mbgWFP5FrNI7lZYLHtE7AJeDVxc/VAdOit43a9lSE9DLpD9+cDpwGcj4gjdU2F3R8QPN5dyYQu97pl5NDO/k5nfBf6a7jvfDrXWl3tEPK/6+iTgj4APNZtoYRERwNXAA5n5Zz2TbgZ2Vo93Ajetd7alLJF76C2WPSLOo3ve9zWZ+a2m8i1liexn9My2A3hwvbMtZ6HsmTmdmc/LzLHMHKP7OQ9nZeZXGox6kiVe9809s/0qcO/8ZYdNq+5QjYjrgHG6b8F5FLgcGAEurWa5EXj7MB6JRcQvAP8KTAPfrYbfQfd83vXAj9K9guDCzJz/R+PGLJH7qcCfA88FHgfuycxfaSLjYpbI/gG6+f+7GjuQmW9c/4SLWyL7JXSvUvou3f3ljZk5VG/Kt1j2zPx4zzxH6F5lNVRvpbvE6/46uqdkEjgC/E7P38qGUqvKXZK0Mq0/LSNJOpnlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgr0f7gSNL5DCI0pAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Now that everything is running smoothly we have to decide what to do with the column of temperature inside and its NaN's,\n", + "df.temp_inside.hist()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "# In the histogram I noticed that most values are around 21.5ºC so I am going to use this value to fill the NaN's,\n", + "df.temp_inside.fillna(21.5, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Now, as I said I think that the interesting think involving temperature must be the difference\n", + "# between the temperature inside and the temperature outside the cars, so I am creating a new column with the difference,\n", + "df['temp_gradient'] = df.apply(lambda x: x.temp_outside-x.temp_inside, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsideACrainsunsnowtemp_gradient
distance1.000000-0.1289670.5622990.0776360.088175-0.025738-0.0197910.081120-0.0337120.080453
consume-0.1289671.000000-0.227866-0.154953-0.3208110.0965910.248118-0.1706670.072961-0.311697
speed0.562299-0.2278661.0000000.0600390.015411-0.0354080.0094890.0816180.0324810.007143
temp_inside0.077636-0.1549530.0600391.0000000.3574310.300407-0.0351990.2415300.0982300.224108
temp_outside0.088175-0.3208110.0154110.3574311.0000000.167562-0.1863150.346903-0.1623670.990287
AC-0.0257380.096591-0.0354080.3004070.1675621.0000000.2429150.0885980.0659840.130129
rain-0.0197910.2481180.009489-0.035199-0.1863150.2429151.000000-0.1126500.271633-0.189179
sun0.081120-0.1706670.0816180.2415300.3469030.088598-0.1126501.000000-0.0306000.326036
snow-0.0337120.0729610.0324810.098230-0.1623670.0659840.271633-0.0306001.000000-0.184053
temp_gradient0.080453-0.3116970.0071430.2241080.9902870.130129-0.1891790.326036-0.1840531.000000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside \\\n", + "distance 1.000000 -0.128967 0.562299 0.077636 0.088175 \n", + "consume -0.128967 1.000000 -0.227866 -0.154953 -0.320811 \n", + "speed 0.562299 -0.227866 1.000000 0.060039 0.015411 \n", + "temp_inside 0.077636 -0.154953 0.060039 1.000000 0.357431 \n", + "temp_outside 0.088175 -0.320811 0.015411 0.357431 1.000000 \n", + "AC -0.025738 0.096591 -0.035408 0.300407 0.167562 \n", + "rain -0.019791 0.248118 0.009489 -0.035199 -0.186315 \n", + "sun 0.081120 -0.170667 0.081618 0.241530 0.346903 \n", + "snow -0.033712 0.072961 0.032481 0.098230 -0.162367 \n", + "temp_gradient 0.080453 -0.311697 0.007143 0.224108 0.990287 \n", + "\n", + " AC rain sun snow temp_gradient \n", + "distance -0.025738 -0.019791 0.081120 -0.033712 0.080453 \n", + "consume 0.096591 0.248118 -0.170667 0.072961 -0.311697 \n", + "speed -0.035408 0.009489 0.081618 0.032481 0.007143 \n", + "temp_inside 0.300407 -0.035199 0.241530 0.098230 0.224108 \n", + "temp_outside 0.167562 -0.186315 0.346903 -0.162367 0.990287 \n", + "AC 1.000000 0.242915 0.088598 0.065984 0.130129 \n", + "rain 0.242915 1.000000 -0.112650 0.271633 -0.189179 \n", + "sun 0.088598 -0.112650 1.000000 -0.030600 0.326036 \n", + "snow 0.065984 0.271633 -0.030600 1.000000 -0.184053 \n", + "temp_gradient 0.130129 -0.189179 0.326036 -0.184053 1.000000 " + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let's check the correlations between the columns to see if there is any evident correlation between them,\n", + "df.corr()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we can already see some tips to lead us into the study of the dataframe, now we must decide what are our objectives and what we want to show in the study in order to transform the dataframe. As Cobify is a company and companies rely on gainings I am going to focus on the price of each commute. I am also going to try and predict the price of a commute before it happens so we can be able to offer the client the best possible price and make it more likely than they trust cobify before other companies. " + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's load this dataframe into a file and transform and play with it in another file,\n", + "df.to_csv('../Data/cleaned_mes.csv')" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "4034f9195f5552b4454ef60198efa491d941068725cfe9b8182a5b0158f58c43" + }, + "kernelspec": { + "display_name": "Python 3.8.12 ('ironhack')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Code/1.Transformation.ipynb b/Code/1.Transformation.ipynb new file mode 100644 index 0000000..5200b9a --- /dev/null +++ b/Code/1.Transformation.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "# All the libraries we are using:\n", + "import pandas as pd\n", + "import numpy as np\n", + "import requests\n", + "from bs4 import BeautifulSoup\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "# let's load the csv file into a dataframe\n", + "df = pd.read_csv('../Data/cleaned_mes.csv', index_col='Unnamed: 0') # To avoid using index_col here I could have used the argument index=False" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsidegas_typeACrainsunsnowtemp_gradient
31311.34.33822.017SP98FalseFalseFalseFalse-5.0
33915.44.14522.024E10FalseFalseFalseFalse2.0
29816.34.55822.016SP98FalseFalseFalseFalse-6.0
7012.35.25521.512SP98FalseFalseFalseFalse-9.5
27924.74.52622.010SP98FalseFalseFalseFalse-12.0
1005.34.13421.59SP98FalseFalseFalseFalse-12.5
18712.34.84122.57E10FalseFalseFalseFalse-15.5
17916.25.22921.00E10FalseFalseFalseFalse-21.0
9011.84.33720.06SP98FalseFalseFalseFalse-14.0
30931.94.33322.016SP98FalseFalseFalseFalse-6.0
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside gas_type AC \\\n", + "313 11.3 4.3 38 22.0 17 SP98 False \n", + "339 15.4 4.1 45 22.0 24 E10 False \n", + "298 16.3 4.5 58 22.0 16 SP98 False \n", + "70 12.3 5.2 55 21.5 12 SP98 False \n", + "279 24.7 4.5 26 22.0 10 SP98 False \n", + "100 5.3 4.1 34 21.5 9 SP98 False \n", + "187 12.3 4.8 41 22.5 7 E10 False \n", + "179 16.2 5.2 29 21.0 0 E10 False \n", + "90 11.8 4.3 37 20.0 6 SP98 False \n", + "309 31.9 4.3 33 22.0 16 SP98 False \n", + "\n", + " rain sun snow temp_gradient \n", + "313 False False False -5.0 \n", + "339 False False False 2.0 \n", + "298 False False False -6.0 \n", + "70 False False False -9.5 \n", + "279 False False False -12.0 \n", + "100 False False False -12.5 \n", + "187 False False False -15.5 \n", + "179 False False False -21.0 \n", + "90 False False False -14.0 \n", + "309 False False False -6.0 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# As we are focusing on the price I am going to create 2 new columns, one with the consume per unit of distance and the other one with a price.\n", + "# I dont have any data of the dates this services were delivered and thus I can not inffer how much did it cost to deliver them, however,\n", + "# I am going to use to prices of this gas types today (17/03/2022): SP98 1.955€/l, E10 1.825€/l\t\n", + "# Source: https://www.dieselogasolina.com/\n", + "df.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "# I have also created a little function to get the price in real time, might be useful for future uses of this code,\n", + "def find_price():\n", + " page = requests.get('https://www.dieselogasolina.com/')\n", + " soup = BeautifulSoup(page.content, 'html.parser')\n", + " table = soup.find('table').find_all('tr')\n", + " p_E10 = table[1].find_all('td')[1].text\n", + " p_SP98 = table[2].find_all('td')[1].text\n", + " return float(p_SP98[:5].replace(',','.')), float(p_E10[:5].replace(',','.'))" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's start by creating a function to transform the dataframe:\n", + "def df_trans(dis, con, gt, prices = find_price()): # Where x is a row in our dataframe, datos de https://www.dieselogasolina.com/\n", + " \"\"\" This function is aimed to transform the dataframe and create 2 new columns with the consume per unit of distance and the price per unit\n", + " of distance, it would also be easy to include a column with the total price (distance*price/distance). It should be used as:\n", + " df[new_columns] = df.apply(lambda x: df_trans(x.distance, x.consume,x.speed,prices=[...]), axis=1)\n", + " It requires an argument with the prices of the gas we are using: [price_SP98, price_E10]\"\"\"\n", + " #print(gt)\n", + " cpd = con/dis # The consume per distance is the first thing we can calculate as it doesn't require any transformation\n", + "\n", + "\n", + " price = dict({'SP98':prices[0], 'E10':prices[1]})\n", + "\n", + " ppd = price[gt]*cpd # We can already know the price per distance\n", + "\n", + " return pd.Series({'con_dis':cpd, 'price_dis':ppd})" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "fp = find_price()\n", + "df['gas_price'] = df.gas_type.apply(lambda x: fp[0] if x == 'SP98' else fp[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "df[['con_dis', 'price_dis']] = df.apply(lambda x:df_trans(x.distance,x.consume,x.gas_type), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "# Now that we have transformed the Dataframe let's save it and work with it in another jupyter,\n", + "df.to_csv('../Data/cleaned_mes.csv', index=False)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "4034f9195f5552b4454ef60198efa491d941068725cfe9b8182a5b0158f58c43" + }, + "kernelspec": { + "display_name": "Python 3.8.12 ('ironhack')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Code/2.ML.ipynb b/Code/2.ML.ipynb new file mode 100644 index 0000000..d6155e3 --- /dev/null +++ b/Code/2.ML.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "# import libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import h2o\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../Data/cleaned_mes.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsidegas_typeACrainsunsnowtemp_gradientgas_pricecon_disprice_dis
028.05.02621.512E10FalseFalseFalseFalse-9.51.8240.1785710.325714
112.04.23021.513E10FalseFalseFalseFalse-8.51.8240.3500000.638400
211.25.53821.515E10FalseFalseFalseFalse-6.51.8240.4910710.895714
312.93.93621.514E10FalseFalseFalseFalse-7.51.8240.3023260.551442
418.54.54621.515E10FalseFalseFalseFalse-6.51.8240.2432430.443676
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside gas_type AC rain \\\n", + "0 28.0 5.0 26 21.5 12 E10 False False \n", + "1 12.0 4.2 30 21.5 13 E10 False False \n", + "2 11.2 5.5 38 21.5 15 E10 False False \n", + "3 12.9 3.9 36 21.5 14 E10 False False \n", + "4 18.5 4.5 46 21.5 15 E10 False False \n", + "\n", + " sun snow temp_gradient gas_price con_dis price_dis \n", + "0 False False -9.5 1.824 0.178571 0.325714 \n", + "1 False False -8.5 1.824 0.350000 0.638400 \n", + "2 False False -6.5 1.824 0.491071 0.895714 \n", + "3 False False -7.5 1.824 0.302326 0.551442 \n", + "4 False False -6.5 1.824 0.243243 0.443676 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n" + ] + } + ], + "source": [ + "# import the prostate data\n", + "datos_h2o = h2o.import_file('../Data/cleaned_mes.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n" + ] + } + ], + "source": [ + "# datos_h2o.describe()\n", + "from h2o.automl import H2OAutoML\n", + "train_as_df = h2o.as_list(datos_h2o,use_pandas=True)\n", + "\n", + "train = h2o.H2OFrame(train_as_df)\n", + "\n", + "x = train.columns\n", + "y = \"price_dis\"\n", + "x.remove(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['distance', 'consume', 'speed', 'temp_inside', 'temp_outside',\n", + " 'gas_type', 'AC', 'rain', 'sun', 'snow', 'temp_gradient', 'con_dis',\n", + " 'price_dis', 'gas_price'],\n", + " dtype='object')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%\n", + "Model Details\n", + "=============\n", + "H2OStackedEnsembleEstimator : Stacked Ensemble\n", + "Model Key: StackedEnsemble_BestOfFamily_1_AutoML_1_20220317_124457\n", + "\n", + "No model summary for this model\n", + "\n", + "ModelMetricsRegressionGLM: stackedensemble\n", + "** Reported on train data. **\n", + "\n", + "MSE: 0.002027113384374124\n", + "RMSE: 0.0450234759250563\n", + "MAE: 0.01938249020739133\n", + "RMSLE: 0.012270042052598252\n", + "R^2: 0.9990698519911392\n", + "Mean Residual Deviance: 0.002027113384374124\n", + "Null degrees of freedom: 387\n", + "Residual degrees of freedom: 386\n", + "Null deviance: 845.5858483215557\n", + "Residual deviance: 0.7865199931371603\n", + "AIC: -1298.9469789365503\n", + "\n", + "ModelMetricsRegressionGLM: stackedensemble\n", + "** Reported on cross-validation data. **\n", + "\n", + "MSE: 0.003350737858061228\n", + "RMSE: 0.05788555828582141\n", + "MAE: 0.021938003328787183\n", + "RMSLE: 0.012846360808887762\n", + "R^2: 0.9984625023094836\n", + "Mean Residual Deviance: 0.003350737858061228\n", + "Null degrees of freedom: 387\n", + "Residual degrees of freedom: 386\n", + "Null deviance: 847.4103524123551\n", + "Residual deviance: 1.3000862889277565\n", + "AIC: -1103.9506823221718\n" + ] + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can already train some models to then do some predictions and see how it works. As I don't have a lot of time to work with the dataframe I am going\n", + "# to use H2O and use the AutoML function, as it follows,\n", + "\n", + "aml = H2OAutoML(max_models=10, seed=1)\n", + "aml.train(x=x, y=y, training_frame=train)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "# create an iterator object with write permission - model.pkl\n", + "with open('model_pkl', 'wb') as files:\n", + " pickle.dump(aml, files)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We could use that to make predictions on the prices of the services of cobify and therefore ajust the prices and be better competidors\n", + "# preds = aml.leader.predict(#new data), This would be teh way to make predictions on new data" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "4034f9195f5552b4454ef60198efa491d941068725cfe9b8182a5b0158f58c43" + }, + "kernelspec": { + "display_name": "Python 3.8.12 ('ironhack')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Code/3._API.ipynb b/Code/3._API.ipynb new file mode 100644 index 0000000..7c6d5a4 --- /dev/null +++ b/Code/3._API.ipynb @@ -0,0 +1,31 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from flask import Flask, request, Markup\n", + "import fun_4api\n", + "import pymongo\n", + "import pandas as pd\n", + "\n", + "app = Flask(__name__)\n", + "# The idea in this jupyter would be to create an API to interact with the users, where by adding the distance we want \n", + "# to travel it returns the price of the gas, it should scrap the price of the gas and the weather in the place we are at\n", + "\n", + "# @app.route('/')\n", + "# ..." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Code/ETLML.py b/Code/ETLML.py new file mode 100644 index 0000000..42459e5 --- /dev/null +++ b/Code/ETLML.py @@ -0,0 +1,80 @@ +import pandas as pd +import numpy as np +import h2o +from h2o.automl import H2OAutoML +import requests +from bs4 import BeautifulSoup +import pickle + +########################################################################################################################################## +# I don't recomend to read this code, I have also handed in a serie of notebooks with all the code commented and easy to follow +# With this file I aim to condense all the work and make it easier to run the models for future aplications +# I also like to write all the code in one file to be able to run it from the terminal without using any interface, +########################################################################################################################################## +# FUNCTIONS, +########################################################################################################################################## +def find_price(): + page = requests.get('https://www.dieselogasolina.com/') + soup = BeautifulSoup(page.content, 'html.parser') + table = soup.find('table').find_all('tr') + p_E10 = table[1].find_all('td')[1].text + p_SP98 = table[2].find_all('td')[1].text + return float(p_SP98[:5].replace(',','.')), float(p_E10[:5].replace(',','.')) + +def df_trans(dis, con, gt, prices = find_price()): # Where x is a row in our dataframe, datos de https://www.dieselogasolina.com/ + """ This function is aimed to transform the dataframe and create 2 new columns with the consume per unit of distance and the price per unit + of distance, it would also be easy to include a column with the total price (distance*price/distance). It should be used as: + df[new_columns] = df.apply(lambda x: df_trans(x.distance, x.consume,x.speed,prices=[...]), axis=1) + It requires an argument with the prices of the gas we are using: [price_SP98, price_E10]""" + #print(gt) + cpd = con/dis # The consume per distance is the first thing we can calculate as it doesn't require any transformation + + + price = dict({'SP98':prices[0], 'E10':prices[1]}) + + ppd = price[gt]*cpd # We can already know the price per distance + + return pd.Series({'con_dis':cpd, 'price_dis':ppd}) +########################################################################################################################################## +# 0. EXPLORATION AND TRANSFORMATION OF THE DATA, +########################################################################################################################################## + +df = pd.read_csv('../Data/measurements.csv') +df['snow'] = df.specials.apply(lambda x: True if type(x) != float and 'snow' in x else False) +cols_2drop = ['specials', 'refill liters','refill gas'] +df.drop(columns=cols_2drop, inplace=True) +df.temp_inside = df.temp_inside.apply(lambda x: float(x.replace(',','.')) if type(x) != float else x) +df.consume = df.consume.apply(lambda x: float(x.replace(',','.'))) +df.distance = df.distance.apply(lambda x: float(x.replace(',','.'))) +df.rain = df.rain.apply(lambda x: bool(x)) +df.sun = df.sun.apply(lambda x: bool(x)) +df.AC = df.AC.apply(lambda x: bool(x)) +df.temp_inside.fillna(21.5, inplace=True) +df['temp_gradient'] = df.apply(lambda x: x.temp_outside-x.temp_inside, axis=1) +fp = find_price() +df['gas_price'] = df.gas_type.apply(lambda x: fp[0] if x == 'SP98' else fp[1]) +df[['con_dis', 'price_dis']] = df.apply(lambda x:df_trans(x.distance,x.consume,x.gas_type), axis=1) + +df.to_csv('../Data/cleaned_mes.csv', index=False) + +########################################################################################################################################## +# 1. GENERATION OF THE MODEL, +########################################################################################################################################## +h2o.init() + +datos_h2o = h2o.H2OFrame(df) + +train_as_df = h2o.as_list(datos_h2o,use_pandas=True) + +train = h2o.H2OFrame(train_as_df) + +x = train.columns +y = "price_dis" +x.remove(y) + +aml = H2OAutoML(max_models=10, seed=1) +aml.train(x=x, y=y, training_frame=train) +with open('model_pkl', 'wb') as files: + pickle.dump(aml, files) + +print(aml.leader) \ No newline at end of file diff --git a/Code/model_pkl b/Code/model_pkl new file mode 100644 index 0000000..d015eb8 Binary files /dev/null and b/Code/model_pkl differ diff --git a/Data/cleaned_mes.csv b/Data/cleaned_mes.csv new file mode 100644 index 0000000..a01fb87 --- /dev/null +++ b/Data/cleaned_mes.csv @@ -0,0 +1,389 @@ +distance,consume,speed,temp_inside,temp_outside,gas_type,AC,rain,sun,snow,temp_gradient,gas_price,con_dis,price_dis +28.0,5.0,26,21.5,12,E10,False,False,False,False,-9.5,1.823,0.17857142857142858,0.32553571428571426 +12.0,4.2,30,21.5,13,E10,False,False,False,False,-8.5,1.823,0.35000000000000003,0.63805 +11.2,5.5,38,21.5,15,E10,False,False,False,False,-6.5,1.823,0.4910714285714286,0.8952232142857143 +12.9,3.9,36,21.5,14,E10,False,False,False,False,-7.5,1.823,0.3023255813953488,0.5511395348837209 +18.5,4.5,46,21.5,15,E10,False,False,False,False,-6.5,1.823,0.24324324324324326,0.44343243243243247 +8.3,6.4,50,21.5,10,E10,False,False,False,False,-11.5,1.823,0.7710843373493975,1.4056867469879517 +7.8,4.4,43,21.5,11,E10,False,False,False,False,-10.5,1.823,0.5641025641025642,1.0283589743589745 +12.3,5.0,40,21.5,6,E10,False,False,False,False,-15.5,1.823,0.4065040650406504,0.7410569105691056 +4.9,6.4,26,21.5,4,E10,False,False,False,False,-17.5,1.823,1.3061224489795917,2.3810612244897955 +11.9,5.3,30,21.5,9,E10,False,False,False,False,-12.5,1.823,0.4453781512605042,0.811924369747899 +12.4,5.6,42,21.5,4,E10,False,False,False,False,-17.5,1.823,0.4516129032258064,0.823290322580645 +11.8,4.6,38,21.5,0,E10,False,False,False,False,-21.5,1.823,0.38983050847457623,0.7106610169491524 +12.3,5.9,59,21.5,10,E10,False,False,False,False,-11.5,1.823,0.4796747967479675,0.8744471544715446 +24.7,5.1,58,21.5,12,E10,False,False,False,False,-9.5,1.823,0.20647773279352225,0.37640890688259104 +12.4,4.7,46,21.5,11,E10,False,False,False,False,-10.5,1.823,0.3790322580645161,0.6909758064516128 +17.3,5.1,24,21.5,5,E10,False,False,False,False,-16.5,1.823,0.29479768786127164,0.5374161849710982 +33.4,5.6,36,21.5,3,E10,False,False,False,False,-18.5,1.823,0.16766467065868262,0.30565269461077843 +11.8,5.1,32,21.5,3,E10,False,False,False,False,-18.5,1.823,0.43220338983050843,0.7879067796610169 +25.9,4.9,39,21.5,8,E10,False,False,False,False,-13.5,1.823,0.1891891891891892,0.3448918918918919 +11.8,4.7,40,21.5,4,E10,False,False,False,False,-17.5,1.823,0.3983050847457627,0.7261101694915254 +25.3,5.5,32,21.5,3,E10,False,False,False,False,-18.5,1.823,0.21739130434782608,0.3963043478260869 +14.2,5.9,38,21.5,1,E10,False,False,False,False,-20.5,1.823,0.41549295774647893,0.7574436619718311 +17.9,5.7,37,21.5,1,E10,False,False,False,False,-20.5,1.823,0.3184357541899442,0.5805083798882682 +11.8,4.7,36,21.5,1,E10,False,False,False,False,-20.5,1.823,0.3983050847457627,0.7261101694915254 +12.3,5.9,62,21.5,6,E10,False,False,False,False,-15.5,1.823,0.4796747967479675,0.8744471544715446 +12.4,4.1,57,21.5,9,E10,False,False,False,False,-12.5,1.823,0.33064516129032256,0.602766129032258 +18.4,5.7,21,22.5,2,E10,False,False,False,False,-20.5,1.823,0.3097826086956522,0.564733695652174 +18.4,5.8,28,21.5,3,E10,False,False,False,False,-18.5,1.823,0.31521739130434784,0.5746413043478261 +18.3,5.5,29,21.5,1,E10,False,False,False,False,-20.5,1.823,0.3005464480874317,0.5478961748633879 +18.4,5.7,35,21.5,4,E10,False,False,False,False,-17.5,1.823,0.3097826086956522,0.564733695652174 +12.3,5.3,51,21.5,11,E10,False,False,False,False,-10.5,1.823,0.4308943089430894,0.7855203252032519 +11.8,5.0,29,21.5,10,E10,False,False,False,False,-11.5,1.823,0.423728813559322,0.772457627118644 +12.3,5.6,58,21.5,8,E10,False,False,False,False,-13.5,1.823,0.4552845528455284,0.8299837398373983 +32.6,4.8,40,21.5,7,E10,False,False,False,False,-14.5,1.823,0.14723926380368096,0.2684171779141104 +19.0,4.3,36,21.5,7,E10,False,False,False,False,-14.5,1.823,0.2263157894736842,0.41257368421052626 +12.1,5.7,36,21.5,8,E10,False,False,False,False,-13.5,1.823,0.4710743801652893,0.8587685950413224 +20.0,5.2,37,21.5,4,E10,False,False,False,False,-17.5,1.823,0.26,0.47398 +4.9,7.4,26,21.5,6,E10,False,False,False,False,-15.5,1.823,1.510204081632653,2.7531020408163265 +11.8,4.8,23,21.5,9,E10,False,False,False,False,-12.5,1.823,0.4067796610169491,0.7415593220338982 +12.3,6.5,58,21.5,7,E10,False,False,False,False,-14.5,1.823,0.5284552845528455,0.9633739837398374 +12.4,4.0,55,21.5,7,E10,True,True,False,False,-14.5,1.823,0.3225806451612903,0.5880645161290322 +4.5,5.0,29,21.5,7,E10,True,False,False,False,-14.5,1.823,1.1111111111111112,2.0255555555555556 +11.7,4.6,37,21.5,6,E10,False,False,False,False,-15.5,1.823,0.39316239316239315,0.7167350427350427 +10.2,4.3,44,21.5,7,E10,False,False,False,False,-14.5,1.823,0.4215686274509804,0.7685196078431372 +5.4,3.3,32,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.611111111111111,1.1935 +2.0,9.9,21,21.5,4,SP98,False,False,False,False,-17.5,1.953,4.95,9.66735 +14.2,5.1,32,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.35915492957746475,0.7014295774647887 +16.0,4.9,25,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.30625,0.5981062500000001 +11.8,4.9,25,21.5,11,SP98,False,False,False,False,-10.5,1.953,0.4152542372881356,0.8109915254237289 +27.3,4.7,36,21.5,10,SP98,False,False,False,False,-11.5,1.953,0.17216117216117216,0.3362307692307692 +11.8,5.1,29,21.5,5,SP98,False,True,False,False,-16.5,1.953,0.43220338983050843,0.844093220338983 +10.6,4.9,63,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.4622641509433963,0.902801886792453 +11.6,4.2,61,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.3620689655172414,0.7071206896551725 +13.1,6.1,46,21.5,6,SP98,False,True,False,False,-15.5,1.953,0.46564885496183206,0.909412213740458 +6.1,6.4,28,21.5,8,SP98,False,False,False,False,-13.5,1.953,1.0491803278688525,2.049049180327869 +153.5,4.9,82,21.5,3,SP98,False,True,False,False,-18.5,1.953,0.03192182410423453,0.06234332247557004 +11.8,4.9,24,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.4152542372881356,0.8109915254237289 +12.3,6.2,61,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.5040650406504065,0.9844390243902439 +12.4,4.2,57,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.33870967741935487,0.6615000000000001 +12.3,5.7,52,21.5,8,SP98,False,True,False,False,-13.5,1.953,0.46341463414634143,0.9050487804878048 +2.9,7.9,18,21.5,9,SP98,True,True,False,False,-12.5,1.953,2.724137931034483,5.320241379310346 +2.1,12.2,23,21.5,9,SP98,True,True,False,False,-12.5,1.953,5.809523809523809,11.346 +9.8,5.8,24,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.5918367346938775,1.155857142857143 +6.8,5.3,26,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.7794117647058824,1.5221911764705882 +12.4,5.4,24,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.43548387096774194,0.8505 +12.4,4.7,57,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.3790322580645161,0.7402500000000001 +11.8,4.6,41,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.38983050847457623,0.7613389830508475 +12.3,5.4,55,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.43902439024390244,0.8574146341463414 +14.0,4.3,40,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.3071428571428571,0.59985 +11.8,4.5,37,21.5,4,SP98,True,False,False,False,-17.5,1.953,0.3813559322033898,0.7447881355932203 +12.3,5.2,55,21.5,12,SP98,False,False,False,False,-9.5,1.953,0.4227642276422764,0.8256585365853658 +2.0,6.2,20,21.5,10,SP98,False,False,False,False,-11.5,1.953,3.1,6.0543000000000005 +13.9,5.1,29,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.3669064748201438,0.7165683453237409 +9.7,3.9,42,21.5,8,SP98,False,False,False,False,-13.5,1.953,0.4020618556701031,0.7852268041237114 +11.6,5.0,25,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.4310344827586207,0.8418103448275863 +14.2,5.4,38,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.3802816901408451,0.7426901408450706 +11.8,4.5,39,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.3813559322033898,0.7447881355932203 +24.8,5.1,50,21.5,9,SP98,False,False,False,False,-12.5,1.953,0.20564516129032256,0.401625 +12.4,4.7,56,21.5,7,SP98,False,False,False,False,-14.5,1.953,0.3790322580645161,0.7402500000000001 +34.8,4.0,28,20.0,4,SP98,False,False,False,False,-16.0,1.953,0.1149425287356322,0.2244827586206897 +14.2,5.4,36,20.0,6,SP98,False,False,False,False,-14.0,1.953,0.3802816901408451,0.7426901408450706 +5.2,4.5,39,20.0,10,SP98,False,False,False,False,-10.0,1.953,0.8653846153846153,1.6900961538461539 +10.5,3.6,42,20.0,10,SP98,False,False,False,False,-10.0,1.953,0.34285714285714286,0.6696000000000001 +12.3,5.2,57,20.0,10,SP98,False,False,False,False,-10.0,1.953,0.4227642276422764,0.8256585365853658 +11.8,4.9,25,20.0,11,SP98,False,False,False,False,-9.0,1.953,0.4152542372881356,0.8109915254237289 +12.3,6.2,58,20.0,11,SP98,False,True,False,False,-9.0,1.953,0.5040650406504065,0.9844390243902439 +13.2,4.3,51,20.0,11,SP98,False,True,False,False,-9.0,1.953,0.32575757575757575,0.6362045454545454 +13.0,5.0,45,20.0,11,SP98,False,True,False,False,-9.0,1.953,0.38461538461538464,0.7511538461538462 +12.9,5.1,32,20.0,11,SP98,False,False,False,False,-9.0,1.953,0.3953488372093023,0.7721162790697674 +13.9,5.6,22,20.0,8,SP98,False,False,False,False,-12.0,1.953,0.40287769784172656,0.786820143884892 +11.8,4.3,37,20.0,6,SP98,False,False,False,False,-14.0,1.953,0.3644067796610169,0.711686440677966 +12.2,5.8,60,20.0,11,SP98,False,False,False,False,-9.0,1.953,0.4754098360655738,0.9284754098360657 +12.5,4.0,51,20.0,13,SP98,False,False,False,False,-7.0,1.953,0.32,0.6249600000000001 +12.4,4.7,43,21.5,10,SP98,False,False,False,False,-11.5,1.953,0.3790322580645161,0.7402500000000001 +11.8,5.9,21,20.0,9,SP98,True,True,False,False,-11.0,1.953,0.5,0.9765 +11.8,5.3,52,21.5,11,SP98,False,False,False,False,-10.5,1.953,0.4491525423728813,0.8771949152542372 +12.5,4.2,57,20.0,11,SP98,False,False,False,False,-9.0,1.953,0.336,0.656208 +15.7,5.3,33,21.5,9,SP98,False,False,False,False,-12.5,1.953,0.3375796178343949,0.6592929936305733 +12.9,5.7,35,21.5,9,SP98,False,False,False,False,-12.5,1.953,0.4418604651162791,0.8629534883720931 +6.4,4.4,37,21.5,10,SP98,False,False,False,False,-11.5,1.953,0.6875,1.3426875 +5.3,4.1,34,21.5,9,SP98,False,False,False,False,-12.5,1.953,0.7735849056603773,1.510811320754717 +26.2,5.8,71,21.0,8,SP98,True,True,False,False,-13.0,1.953,0.22137404580152673,0.4323435114503817 +18.8,5.0,62,21.5,9,SP98,False,True,False,False,-12.5,1.953,0.26595744680851063,0.5194148936170213 +4.9,6.9,25,21.0,12,SP98,False,True,False,False,-9.0,1.953,1.4081632653061225,2.7501428571428574 +12.4,5.4,18,21.0,11,SP98,True,True,False,False,-10.0,1.953,0.43548387096774194,0.8505 +22.9,5.3,45,20.5,7,SP98,False,False,False,False,-13.5,1.953,0.2314410480349345,0.4520043668122271 +162.7,5.5,75,23.0,1,SP98,False,False,False,False,-22.0,1.953,0.03380454824830977,0.06602028272894898 +4.9,6.5,26,21.0,1,SP98,False,False,False,False,-20.0,1.953,1.3265306122448979,2.5907142857142857 +11.8,4.7,36,21.0,4,SP98,False,False,False,False,-17.0,1.953,0.3983050847457627,0.7778898305084746 +16.6,5.1,56,21.0,7,SP98,False,False,False,False,-14.0,1.953,0.30722891566265054,0.6000180722891565 +12.4,5.7,37,21.0,7,SP98,False,False,False,False,-14.0,1.953,0.4596774193548387,0.89775 +15.9,5.4,25,21.0,7,SP98,False,False,False,False,-14.0,1.953,0.339622641509434,0.6632830188679246 +5.1,8.7,21,23.5,5,SP98,False,False,False,False,-18.5,1.953,1.7058823529411764,3.3315882352941175 +22.4,4.9,66,20.5,7,SP98,False,False,False,False,-13.5,1.953,0.21875000000000003,0.4272187500000001 +31.1,4.7,42,20.5,7,SP98,False,False,False,False,-13.5,1.953,0.15112540192926044,0.2951479099678456 +4.9,6.3,27,20.5,3,SP98,False,False,False,False,-17.5,1.953,1.2857142857142856,2.5109999999999997 +11.8,5.1,26,23.0,4,SP98,False,True,False,False,-19.0,1.953,0.43220338983050843,0.844093220338983 +22.9,6.0,42,23.0,4,SP98,False,True,False,False,-19.0,1.953,0.26200873362445415,0.511703056768559 +12.4,4.6,38,23.0,1,SP98,False,True,False,True,-22.0,1.953,0.3709677419354838,0.7244999999999999 +12.9,5.8,40,23.0,4,SP98,False,False,False,False,-19.0,1.953,0.44961240310077516,0.878093023255814 +11.8,5.1,43,23.0,0,SP98,False,False,False,False,-23.0,1.953,0.43220338983050843,0.844093220338983 +12.2,5.8,58,23.0,2,SP98,False,False,False,False,-21.0,1.953,0.4754098360655738,0.9284754098360657 +24.8,4.6,55,23.0,3,SP98,False,False,False,False,-20.0,1.953,0.1854838709677419,0.36224999999999996 +14.2,5.6,24,23.0,8,SP98,False,False,False,False,-15.0,1.953,0.39436619718309857,0.7701971830985915 +11.8,4.6,38,23.0,0,SP98,False,True,False,True,-23.0,1.953,0.38983050847457623,0.7613389830508475 +12.2,6.3,57,23.0,0,SP98,False,True,False,True,-23.0,1.953,0.5163934426229508,1.0085163934426231 +24.7,5.5,56,25.0,1,SP98,False,False,False,False,-24.0,1.953,0.22267206477732795,0.43487854251012154 +6.8,4.3,46,24.0,2,SP98,False,False,False,False,-22.0,1.953,0.6323529411764706,1.2349852941176471 +17.3,5.6,37,21.5,1,SP98,False,False,False,False,-20.5,1.953,0.32369942196531787,0.6321849710982658 +11.8,4.3,44,21.5,-3,SP98,False,False,False,False,-24.5,1.953,0.3644067796610169,0.711686440677966 +15.9,5.7,46,21.5,5,SP98,False,False,False,False,-16.5,1.953,0.3584905660377358,0.7001320754716981 +5.1,6.4,39,21.5,4,SP98,False,False,False,False,-17.5,1.953,1.254901960784314,2.4508235294117653 +16.1,4.5,33,21.5,6,SP98,False,False,False,False,-15.5,1.953,0.2795031055900621,0.5458695652173913 +11.8,4.5,43,21.5,3,SP98,False,False,False,False,-18.5,1.953,0.3813559322033898,0.7447881355932203 +4.2,6.0,26,21.5,5,SP98,False,False,False,False,-16.5,1.953,1.4285714285714286,2.79 +17.4,5.1,30,21.5,5,SP98,False,False,False,False,-16.5,1.953,0.2931034482758621,0.5724310344827587 +23.5,6.0,25,21.5,5,SP98,False,True,False,False,-16.5,1.953,0.2553191489361702,0.49863829787234043 +11.8,4.5,38,21.5,5,SP98,False,True,False,False,-16.5,1.953,0.3813559322033898,0.7447881355932203 +12.3,6.1,61,21.5,10,SP98,False,True,False,False,-11.5,1.953,0.4959349593495934,0.968560975609756 +16.1,5.4,24,21.5,7,E10,False,True,False,False,-14.5,1.823,0.3354037267080745,0.6114409937888198 +11.8,4.3,40,21.5,10,E10,False,True,False,False,-11.5,1.823,0.3644067796610169,0.6643135593220338 +12.3,5.4,58,21.5,13,E10,False,False,False,False,-8.5,1.823,0.43902439024390244,0.8003414634146341 +12.4,4.3,49,21.5,17,E10,False,False,False,False,-4.5,1.823,0.34677419354838707,0.6321693548387096 +7.0,5.2,25,21.5,17,E10,False,False,False,False,-4.5,1.823,0.7428571428571429,1.3542285714285713 +11.8,4.1,37,21.5,10,E10,False,False,False,False,-11.5,1.823,0.34745762711864403,0.6334152542372881 +20.1,4.4,41,21.5,18,E10,False,False,False,False,-3.5,1.823,0.21890547263681592,0.3990646766169154 +20.8,4.5,45,21.5,10,E10,False,False,False,False,-11.5,1.823,0.21634615384615383,0.3943990384615384 +1.7,10.8,14,21.5,10,E10,False,True,False,False,-11.5,1.823,6.352941176470589,11.581411764705884 +35.9,4.7,45,21.5,12,E10,False,False,False,False,-9.5,1.823,0.13091922005571033,0.23866573816155992 +36.9,4.8,52,21.5,5,E10,False,False,False,False,-16.5,1.823,0.13008130081300814,0.23713821138211383 +16.8,4.0,46,21.5,8,E10,False,False,False,False,-13.5,1.823,0.23809523809523808,0.434047619047619 +9.9,5.0,28,21.5,9,E10,False,False,False,False,-12.5,1.823,0.5050505050505051,0.9207070707070707 +22.9,4.6,61,21.5,7,E10,False,True,False,False,-14.5,1.823,0.20087336244541484,0.36619213973799125 +17.3,5.0,61,21.5,6,E10,True,True,False,False,-15.5,1.823,0.2890173410404624,0.526878612716763 +11.8,4.3,37,21.5,7,E10,False,False,False,False,-14.5,1.823,0.3644067796610169,0.6643135593220338 +36.6,5.2,80,21.5,7,E10,False,True,False,False,-14.5,1.823,0.14207650273224043,0.2590054644808743 +44.9,4.7,62,21.5,8,E10,False,False,False,False,-13.5,1.823,0.1046770601336303,0.19082628062360804 +11.8,4.2,34,21.5,9,E10,False,True,False,False,-12.5,1.823,0.3559322033898305,0.648864406779661 +21.6,5.3,44,21.5,9,E10,False,True,False,False,-12.5,1.823,0.24537037037037035,0.4473101851851851 +39.4,5.3,60,21.5,9,E10,False,True,False,False,-12.5,1.823,0.13451776649746192,0.24522588832487308 +5.1,8.1,39,21.5,4,E10,False,False,False,False,-17.5,1.823,1.5882352941176472,2.895352941176471 +26.6,4.8,38,21.5,7,E10,False,False,False,False,-14.5,1.823,0.18045112781954886,0.32896240601503757 +53.2,5.1,71,21.5,2,E10,False,False,False,False,-19.5,1.823,0.09586466165413533,0.1747612781954887 +18.9,4.4,53,21.5,2,E10,False,False,False,False,-19.5,1.823,0.23280423280423285,0.42440211640211645 +43.5,5.0,80,21.5,3,E10,False,False,False,False,-18.5,1.823,0.11494252873563218,0.20954022988505747 +6.1,6.3,26,21.5,5,E10,False,False,False,False,-16.5,1.823,1.0327868852459017,1.8827704918032786 +16.4,4.8,49,21.5,5,E10,False,False,False,False,-16.5,1.823,0.29268292682926833,0.5335609756097561 +12.3,6.1,40,21.5,6,E10,False,False,False,False,-15.5,1.823,0.4959349593495934,0.9040894308943088 +21.1,4.6,36,21.5,8,E10,False,False,False,False,-13.5,1.823,0.21800947867298576,0.397431279620853 +21.1,4.8,43,21.5,7,E10,False,False,False,False,-14.5,1.823,0.22748815165876776,0.4147109004739336 +22.7,4.7,55,21.5,6,E10,False,False,False,False,-15.5,1.823,0.20704845814977976,0.3774493392070485 +44.4,4.8,38,21.5,8,E10,False,False,False,False,-13.5,1.823,0.10810810810810811,0.19708108108108108 +35.8,4.4,51,21.5,6,E10,False,False,False,False,-15.5,1.823,0.12290502793296092,0.22405586592178775 +11.8,4.9,44,21.5,0,E10,False,False,False,False,-21.5,1.823,0.4152542372881356,0.7570084745762712 +26.2,4.9,42,21.5,6,E10,False,False,False,False,-15.5,1.823,0.18702290076335878,0.3409427480916031 +40.6,4.4,44,21.0,3,E10,False,False,False,False,-18.0,1.823,0.10837438423645321,0.19756650246305418 +12.4,5.3,38,21.0,-5,E10,False,False,False,False,-26.0,1.823,0.42741935483870963,0.7791854838709676 +14.1,5.3,47,21.0,-3,E10,False,False,False,False,-24.0,1.823,0.375886524822695,0.6852411347517731 +58.7,4.8,75,21.0,0,E10,False,False,False,False,-21.0,1.823,0.0817717206132879,0.14906984667802384 +16.2,5.2,29,21.0,0,E10,False,False,False,False,-21.0,1.823,0.3209876543209877,0.5851604938271605 +12.3,4.9,50,22.5,0,E10,False,False,False,False,-22.5,1.823,0.3983739837398374,0.7262357723577235 +12.3,7.1,52,22.5,0,E10,True,True,False,True,-22.5,1.823,0.5772357723577235,1.05230081300813 +12.4,5.2,51,22.5,1,E10,False,False,False,False,-21.5,1.823,0.41935483870967744,0.764483870967742 +31.8,4.7,59,22.5,3,E10,False,False,False,False,-19.5,1.823,0.14779874213836477,0.26943710691823897 +12.3,5.1,55,22.5,8,E10,False,False,False,False,-14.5,1.823,0.41463414634146334,0.7558780487804877 +51.6,5.0,73,22.5,12,E10,False,False,False,False,-10.5,1.823,0.09689922480620154,0.1766472868217054 +38.6,4.6,44,22.5,10,E10,False,False,False,False,-12.5,1.823,0.11917098445595854,0.21724870466321242 +12.3,4.8,41,22.5,7,E10,False,False,False,False,-15.5,1.823,0.39024390243902435,0.7114146341463413 +81.2,4.4,69,22.0,13,E10,False,False,False,False,-9.0,1.823,0.054187192118226604,0.09878325123152709 +130.3,4.6,85,22.0,12,E10,False,False,False,False,-10.0,1.823,0.03530314658480429,0.06435763622409822 +67.2,4.3,67,22.0,18,E10,False,False,False,False,-4.0,1.823,0.06398809523809523,0.1166502976190476 +43.7,4.7,44,22.0,9,SP98,False,True,False,False,-13.0,1.953,0.10755148741418764,0.21004805491990847 +12.1,4.2,43,22.0,4,SP98,False,False,False,False,-18.0,1.953,0.34710743801652894,0.677900826446281 +56.1,4.8,82,22.0,13,SP98,False,False,False,False,-9.0,1.953,0.0855614973262032,0.16710160427807486 +39.0,4.1,61,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.10512820512820512,0.2053153846153846 +11.8,4.5,41,21.5,13,SP98,False,False,False,False,-8.5,1.953,0.3813559322033898,0.7447881355932203 +38.5,4.8,63,21.5,14,SP98,False,False,False,False,-7.5,1.953,0.12467532467532468,0.2434909090909091 +28.2,4.6,54,21.5,14,SP98,False,False,False,False,-7.5,1.953,0.1631205673758865,0.31857446808510637 +2.9,7.4,24,21.5,14,SP98,False,False,False,False,-7.5,1.953,2.5517241379310347,4.983517241379311 +6.1,5.6,24,21.5,13,SP98,False,False,False,False,-8.5,1.953,0.9180327868852459,1.7929180327868854 +19.6,4.9,43,21.5,13,SP98,False,False,False,False,-8.5,1.953,0.25,0.48825 +22.2,3.8,42,21.5,15,SP98,False,False,False,False,-6.5,1.953,0.17117117117117117,0.3342972972972973 +13.6,4.5,44,19.0,18,SP98,False,False,False,False,-1.0,1.953,0.33088235294117646,0.6462132352941177 +12.6,4.1,33,21.5,17,SP98,False,False,False,False,-4.5,1.953,0.32539682539682535,0.6355 +8.7,5.3,28,22.5,12,SP98,True,True,False,False,-10.5,1.953,0.6091954022988506,1.1897586206896553 +7.9,4.7,31,22.5,12,SP98,True,False,False,False,-10.5,1.953,0.5949367088607594,1.1619113924050632 +2.4,9.0,26,20.0,10,SP98,False,False,False,False,-10.0,1.953,3.75,7.32375 +4.9,6.3,26,20.0,10,SP98,False,False,False,False,-10.0,1.953,1.2857142857142856,2.5109999999999997 +18.1,3.6,36,20.0,19,SP98,False,False,False,False,-1.0,1.953,0.19889502762430938,0.38844198895027626 +25.9,3.7,39,20.0,21,SP98,False,False,False,False,1.0,1.953,0.14285714285714288,0.279 +1.3,11.5,21,20.0,10,SP98,False,False,False,False,-10.0,1.953,8.846153846153847,17.276538461538465 +14.1,5.0,22,20.0,12,SP98,False,False,False,False,-8.0,1.953,0.3546099290780142,0.6925531914893618 +13.4,5.5,31,20.0,9,SP98,False,False,False,False,-11.0,1.953,0.4104477611940298,0.8016044776119402 +6.4,4.7,33,20.0,8,SP98,False,False,False,False,-12.0,1.953,0.734375,1.434234375 +12.9,4.5,42,20.0,13,SP98,False,False,False,False,-7.0,1.953,0.3488372093023256,0.6812790697674419 +12.1,4.4,33,22.5,5,SP98,False,False,False,False,-17.5,1.953,0.3636363636363637,0.7101818181818184 +15.7,4.1,32,22.5,13,SP98,False,False,False,False,-9.5,1.953,0.2611464968152866,0.5100191082802548 +16.2,4.4,26,22.5,11,SP98,False,False,False,False,-11.5,1.953,0.271604938271605,0.5304444444444445 +12.8,4.6,22,22.5,12,SP98,False,False,False,False,-10.5,1.953,0.35937499999999994,0.701859375 +19.0,4.4,58,22.5,17,SP98,False,False,True,False,-5.5,1.953,0.23157894736842108,0.4522736842105264 +29.0,4.0,27,22.5,12,SP98,False,False,False,False,-10.5,1.953,0.13793103448275862,0.2693793103448276 +12.1,5.0,32,22.5,9,SP98,False,False,False,False,-13.5,1.953,0.4132231404958678,0.8070247933884298 +12.3,5.2,55,22.5,10,SP98,False,False,False,False,-12.5,1.953,0.4227642276422764,0.8256585365853658 +24.8,4.0,56,22.5,11,SP98,False,False,False,False,-11.5,1.953,0.16129032258064516,0.315 +12.9,5.1,34,22.5,8,SP98,False,True,False,False,-14.5,1.953,0.3953488372093023,0.7721162790697674 +11.8,4.5,39,22.5,3,SP98,False,False,False,False,-19.5,1.953,0.3813559322033898,0.7447881355932203 +31.4,4.6,62,22.5,11,SP98,False,False,False,False,-11.5,1.953,0.1464968152866242,0.2861082802547771 +19.0,5.1,53,22.5,4,SP98,False,True,False,False,-18.5,1.953,0.26842105263157895,0.5242263157894737 +13.0,5.7,38,22.5,3,SP98,True,True,False,False,-19.5,1.953,0.43846153846153846,0.8563153846153846 +11.8,4.8,42,22.5,2,SP98,False,False,False,False,-20.5,1.953,0.4067796610169491,0.7944406779661016 +13.0,6.2,32,22.5,4,SP98,False,False,False,False,-18.5,1.953,0.47692307692307695,0.9314307692307693 +11.8,5.0,43,22.5,1,SP98,False,False,False,False,-21.5,1.953,0.423728813559322,0.8275423728813559 +27.1,5.0,69,22.5,8,SP98,False,False,False,False,-14.5,1.953,0.18450184501845018,0.3603321033210332 +5.2,4.6,38,22.5,8,SP98,False,False,False,False,-14.5,1.953,0.8846153846153845,1.727653846153846 +19.0,4.5,29,22.5,10,E10,False,False,False,False,-12.5,1.823,0.23684210526315788,0.4317631578947368 +12.4,4.8,38,22.5,1,E10,False,False,False,False,-21.5,1.823,0.3870967741935484,0.7056774193548386 +25.2,5.0,55,22.5,9,E10,False,False,False,False,-13.5,1.823,0.19841269841269843,0.36170634920634925 +14.3,4.8,36,22.5,10,E10,False,False,False,False,-12.5,1.823,0.3356643356643356,0.6119160839160839 +11.8,4.6,40,22.5,2,E10,False,False,False,False,-20.5,1.823,0.38983050847457623,0.7106610169491524 +16.9,4.5,48,22.5,9,E10,False,False,True,False,-13.5,1.823,0.26627218934911245,0.485414201183432 +12.4,4.6,55,22.5,11,E10,False,False,True,False,-11.5,1.823,0.3709677419354838,0.676274193548387 +17.4,4.4,36,22.5,12,E10,False,False,True,False,-10.5,1.823,0.25287356321839083,0.46098850574712646 +9.2,5.7,33,22.5,8,E10,False,True,False,False,-14.5,1.823,0.6195652173913044,1.129467391304348 +12.3,5.8,54,22.5,10,E10,False,True,False,False,-12.5,1.823,0.47154471544715443,0.8596260162601626 +13.0,5.9,32,22.5,10,E10,False,False,False,False,-12.5,1.823,0.4538461538461539,0.8273615384615386 +11.8,6.1,16,22.5,6,E10,False,True,False,False,-16.5,1.823,0.5169491525423728,0.9423983050847456 +13.0,5.7,37,22.5,11,E10,False,True,False,False,-11.5,1.823,0.43846153846153846,0.7993153846153845 +12.3,5.0,42,22.5,10,E10,False,False,False,False,-12.5,1.823,0.4065040650406504,0.7410569105691056 +12.3,5.2,57,22.5,15,E10,False,False,True,False,-7.5,1.823,0.4227642276422764,0.7706991869918698 +12.5,4.3,57,22.5,16,E10,False,False,True,False,-6.5,1.823,0.344,0.6271119999999999 +31.5,4.1,30,22.5,16,E10,False,False,True,False,-6.5,1.823,0.13015873015873014,0.23727936507936503 +11.8,4.4,42,22.5,8,E10,False,False,False,False,-14.5,1.823,0.3728813559322034,0.6797627118644067 +24.9,4.5,53,22.5,14,E10,False,False,False,False,-8.5,1.823,0.18072289156626506,0.3294578313253012 +17.0,3.9,46,22.5,14,E10,False,False,True,False,-8.5,1.823,0.22941176470588234,0.4182176470588235 +2.0,8.1,20,22.5,14,E10,False,False,False,False,-8.5,1.823,4.05,7.38315 +11.8,4.4,33,22.5,8,E10,False,False,False,False,-14.5,1.823,0.3728813559322034,0.6797627118644067 +7.4,5.0,31,22.5,12,E10,False,False,False,False,-10.5,1.823,0.6756756756756757,1.2317567567567567 +12.4,4.7,55,22.5,14,E10,False,False,True,False,-8.5,1.823,0.3790322580645161,0.6909758064516128 +2.0,6.0,22,22.5,14,E10,False,False,False,False,-8.5,1.823,3.0,5.468999999999999 +14.0,5.0,41,22.5,8,E10,False,False,False,False,-14.5,1.823,0.35714285714285715,0.6510714285714285 +25.7,5.0,45,22.5,7,E10,False,False,False,False,-15.5,1.823,0.19455252918287938,0.3546692607003891 +24.5,3.9,50,21.5,15,E10,False,False,True,False,-6.5,1.823,0.15918367346938775,0.29019183673469384 +11.8,4.5,28,22.5,12,E10,False,False,False,False,-10.5,1.823,0.3813559322033898,0.6952118644067796 +4.1,5.4,24,22.5,13,E10,False,False,False,False,-9.5,1.823,1.3170731707317076,2.401024390243903 +4.2,5.6,29,22.0,17,E10,False,False,False,False,-5.0,1.823,1.3333333333333333,2.4306666666666663 +4.2,3.9,29,22.0,18,E10,False,False,True,False,-4.0,1.823,0.9285714285714285,1.6927857142857141 +16.0,4.0,40,22.0,10,E10,False,False,False,False,-12.0,1.823,0.25,0.45575 +22.9,4.0,29,21.5,21,E10,False,False,False,False,-0.5,1.823,0.17467248908296945,0.3184279475982533 +16.0,3.8,42,21.5,8,E10,False,False,False,False,-13.5,1.823,0.2375,0.4329625 +15.4,4.5,50,22.0,14,E10,False,False,False,False,-8.0,1.823,0.2922077922077922,0.5326948051948052 +16.0,3.8,41,22.0,12,E10,False,False,False,False,-10.0,1.823,0.2375,0.4329625 +4.2,5.6,32,22.0,18,E10,False,False,False,False,-4.0,1.823,1.3333333333333333,2.4306666666666663 +101.9,5.2,75,22.0,18,E10,False,False,False,False,-4.0,1.823,0.05103042198233562,0.09302845927379784 +93.9,4.8,88,23.0,18,E10,True,False,True,False,-5.0,1.823,0.051118210862619806,0.0931884984025559 +25.7,4.9,50,22.0,10,SP98,False,True,False,False,-12.0,1.953,0.1906614785992218,0.3723618677042802 +16.0,4.1,40,22.0,10,SP98,False,False,False,False,-12.0,1.953,0.25625,0.5004562499999999 +16.1,4.5,32,22.0,19,SP98,False,False,False,False,-3.0,1.953,0.2795031055900621,0.5458695652173913 +16.0,4.4,40,22.0,7,SP98,True,True,False,False,-15.0,1.953,0.275,0.5370750000000001 +16.0,4.5,41,22.0,11,SP98,False,False,False,False,-11.0,1.953,0.28125,0.54928125 +24.7,4.5,26,22.0,10,SP98,False,False,False,False,-12.0,1.953,0.18218623481781376,0.3558097165991903 +16.0,3.9,42,22.0,8,SP98,False,False,False,False,-14.0,1.953,0.24375,0.47604375 +15.4,4.6,43,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.2987012987012987,0.5833636363636363 +16.0,3.8,40,22.0,8,SP98,False,False,False,False,-14.0,1.953,0.2375,0.4638375 +32.1,4.5,50,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.14018691588785046,0.27378504672897197 +25.9,4.4,40,22.0,14,SP98,False,False,False,False,-8.0,1.953,0.1698841698841699,0.3317837837837838 +48.6,4.3,44,22.0,12,SP98,False,False,False,False,-10.0,1.953,0.08847736625514403,0.1727962962962963 +37.2,4.0,45,22.0,20,SP98,False,False,True,False,-2.0,1.953,0.1075268817204301,0.21 +28.8,3.9,35,22.0,15,SP98,False,False,True,False,-7.0,1.953,0.13541666666666666,0.26446875 +6.7,5.0,30,22.0,17,SP98,False,False,False,False,-5.0,1.953,0.7462686567164178,1.457462686567164 +7.4,4.1,25,22.0,18,SP98,False,False,True,False,-4.0,1.953,0.554054054054054,1.0820675675675675 +17.3,4.1,22,22.0,25,SP98,False,False,True,False,3.0,1.953,0.23699421965317916,0.4628497109826589 +6.6,5.6,43,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.8484848484848485,1.6570909090909092 +14.3,4.1,26,22.0,20,SP98,False,False,False,False,-2.0,1.953,0.28671328671328666,0.5599510489510489 +13.3,4.6,33,22.0,18,SP98,False,False,False,False,-4.0,1.953,0.3458646616541353,0.6754736842105263 +8.3,4.9,26,22.0,23,SP98,False,False,False,False,1.0,1.953,0.5903614457831325,1.152975903614458 +12.7,4.5,39,22.0,27,SP98,False,False,True,False,5.0,1.953,0.35433070866141736,0.6920078740157481 +16.5,4.1,47,22.0,14,SP98,False,False,False,False,-8.0,1.953,0.24848484848484848,0.4852909090909091 +20.6,4.1,38,22.0,21,SP98,False,False,False,False,-1.0,1.953,0.1990291262135922,0.38870388349514556 +16.3,4.5,58,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.2760736196319018,0.5391717791411043 +18.7,4.2,65,25.0,18,SP98,True,False,True,False,-7.0,1.953,0.22459893048128343,0.43864171122994655 +36.5,3.9,54,23.0,18,SP98,False,False,True,False,-5.0,1.953,0.10684931506849314,0.2086767123287671 +19.0,5.0,35,22.0,15,SP98,True,False,True,False,-7.0,1.953,0.2631578947368421,0.5139473684210526 +16.6,4.4,46,22.0,5,SP98,True,False,False,False,-17.0,1.953,0.26506024096385544,0.5176626506024097 +29.9,4.5,32,22.0,18,SP98,True,False,False,False,-4.0,1.953,0.1505016722408027,0.29392976588628766 +16.0,3.8,42,22.0,11,SP98,False,False,False,False,-11.0,1.953,0.2375,0.4638375 +21.1,5.1,33,22.0,10,SP98,False,True,False,False,-12.0,1.953,0.24170616113744073,0.47205213270142177 +16.0,3.9,40,22.0,10,SP98,False,False,False,False,-12.0,1.953,0.24375,0.47604375 +11.9,5.3,34,22.0,13,SP98,False,False,False,False,-9.0,1.953,0.4453781512605042,0.8698235294117647 +10.1,4.2,35,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.4158415841584159,0.8121386138613862 +31.9,4.3,33,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.13479623824451412,0.2632570532915361 +18.7,4.0,60,22.0,13,SP98,False,False,False,False,-9.0,1.953,0.21390374331550802,0.4177540106951872 +10.8,4.7,48,22.0,17,SP98,False,False,True,False,-5.0,1.953,0.4351851851851852,0.8499166666666667 +19.8,4.0,56,22.0,17,SP98,False,False,True,False,-5.0,1.953,0.20202020202020202,0.39454545454545453 +11.3,4.3,38,22.0,17,SP98,False,False,False,False,-5.0,1.953,0.3805309734513274,0.7431769911504424 +11.5,5.3,53,22.0,15,SP98,False,False,False,False,-7.0,1.953,0.4608695652173913,0.9000782608695652 +21.4,5.2,51,22.0,13,SP98,False,False,False,False,-9.0,1.953,0.2429906542056075,0.4745607476635515 +32.0,4.9,53,22.0,15,SP98,False,False,False,False,-7.0,1.953,0.153125,0.29905312500000003 +41.9,4.7,53,22.0,14,SP98,False,False,False,False,-8.0,1.953,0.11217183770883056,0.2190715990453461 +211.0,4.6,80,22.0,20,SP98,False,False,False,False,-2.0,1.953,0.021800947867298578,0.042577251184834124 +216.1,5.3,90,22.0,21,SP98,False,False,True,False,-1.0,1.953,0.024525682554372975,0.04789865802869042 +25.5,3.6,27,22.0,27,SP98,False,False,True,False,5.0,1.953,0.1411764705882353,0.2757176470588235 +16.6,4.2,52,22.0,15,SP98,False,True,False,False,-7.0,1.953,0.25301204819277107,0.4941325301204819 +25.9,4.1,43,22.0,24,SP98,False,False,False,False,2.0,1.953,0.1583011583011583,0.30916216216216214 +16.5,4.0,48,22.0,15,SP98,False,False,False,False,-7.0,1.953,0.24242424242424243,0.47345454545454546 +22.1,3.9,29,22.0,26,SP98,False,False,False,False,4.0,1.953,0.1764705882352941,0.3446470588235294 +16.6,3.7,49,22.0,17,E10,False,False,False,False,-5.0,1.823,0.22289156626506024,0.4063313253012048 +15.4,4.5,43,22.0,24,E10,False,False,False,False,2.0,1.823,0.2922077922077922,0.5326948051948052 +16.5,3.9,43,22.0,17,E10,False,False,False,False,-5.0,1.823,0.23636363636363636,0.43089090909090905 +15.4,5.1,21,22.0,24,E10,False,False,False,False,2.0,1.823,0.3311688311688311,0.6037207792207792 +18.4,4.2,59,22.0,24,E10,False,False,False,False,2.0,1.823,0.2282608695652174,0.4161195652173913 +39.5,4.2,58,22.0,25,E10,False,False,False,False,3.0,1.823,0.10632911392405063,0.1938379746835443 +21.5,4.8,47,25.0,27,E10,True,False,True,False,2.0,1.823,0.22325581395348837,0.4069953488372093 +4.8,4.7,26,25.0,18,E10,False,False,False,False,-7.0,1.823,0.9791666666666667,1.7850208333333335 +100.9,4.7,87,23.5,22,E10,False,False,False,False,-1.5,1.823,0.04658077304261645,0.0849167492566898 +129.7,4.6,58,24.0,21,E10,False,False,False,False,-3.0,1.823,0.035466461063993836,0.06465535851966075 +16.6,3.8,43,21.5,16,E10,False,False,False,False,-5.5,1.823,0.22891566265060237,0.41731325301204814 +15.4,4.2,50,21.5,21,E10,False,False,False,False,-0.5,1.823,0.2727272727272727,0.4971818181818181 +16.0,3.7,45,22.0,17,E10,False,False,False,False,-5.0,1.823,0.23125,0.42156875 +16.0,4.2,41,22.0,15,E10,False,False,False,False,-7.0,1.823,0.2625,0.4785375 +15.4,4.1,45,22.0,24,E10,False,False,False,False,2.0,1.823,0.2662337662337662,0.4853441558441558 +16.0,4.0,43,22.0,16,E10,False,False,False,False,-6.0,1.823,0.25,0.45575 +21.0,3.8,37,22.0,21,E10,False,False,False,False,-1.0,1.823,0.18095238095238095,0.32987619047619043 +15.4,3.8,47,22.0,24,E10,False,False,False,False,2.0,1.823,0.24675324675324672,0.44983116883116875 +16.0,3.8,42,22.0,17,E10,False,False,False,False,-5.0,1.823,0.2375,0.4329625 +16.1,5.1,30,25.0,16,E10,True,True,False,False,-9.0,1.823,0.31677018633540366,0.5774720496894409 +16.0,4.0,42,22.0,15,E10,False,False,False,False,-7.0,1.823,0.25,0.45575 +15.4,4.8,40,22.0,18,E10,False,False,False,False,-4.0,1.823,0.3116883116883117,0.5682077922077922 +17.2,3.9,35,22.0,16,E10,False,False,False,False,-6.0,1.823,0.22674418604651164,0.4133546511627907 +16.6,4.6,50,22.0,25,E10,False,False,False,False,3.0,1.823,0.2771084337349397,0.5051686746987951 +18.3,4.3,46,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.2349726775956284,0.45890163934426226 +16.1,4.8,36,22.0,18,SP98,False,True,False,False,-4.0,1.953,0.29813664596273287,0.5822608695652173 +16.0,4.1,46,22.0,17,SP98,False,False,False,False,-5.0,1.953,0.25625,0.5004562499999999 +16.3,4.0,35,22.0,23,SP98,False,False,False,False,1.0,1.953,0.24539877300613497,0.4792638036809816 +16.0,3.7,44,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.23125,0.45163125000000004 +18.3,4.3,46,22.0,23,SP98,False,False,False,False,1.0,1.953,0.2349726775956284,0.45890163934426226 +4.5,5.2,23,22.0,23,SP98,False,False,True,False,1.0,1.953,1.1555555555555557,2.2568 +17.0,4.2,48,22.0,16,SP98,False,False,False,False,-6.0,1.953,0.24705882352941178,0.48250588235294123 +6.1,5.0,35,24.5,18,SP98,False,False,False,False,-6.5,1.953,0.819672131147541,1.6008196721311476 +6.1,5.0,33,22.0,24,SP98,False,False,True,False,2.0,1.953,0.819672131147541,1.6008196721311476 +17.3,4.2,36,22.0,22,SP98,False,False,False,False,0.0,1.953,0.24277456647398843,0.4741387283236994 +6.0,4.8,27,22.0,17,SP98,False,False,False,False,-5.0,1.953,0.7999999999999999,1.5624 +16.0,4.1,45,22.0,15,SP98,False,False,False,False,-7.0,1.953,0.25625,0.5004562499999999 +15.4,4.9,55,22.0,18,SP98,False,False,False,False,-4.0,1.953,0.3181818181818182,0.6214090909090909 +15.4,4.3,39,22.0,23,SP98,False,False,False,False,1.0,1.953,0.2792207792207792,0.5453181818181818 +16.0,3.9,35,22.0,18,SP98,False,False,False,False,-4.0,1.953,0.24375,0.47604375 +15.4,4.5,52,22.0,26,SP98,False,False,False,False,4.0,1.953,0.2922077922077922,0.5706818181818182 +16.6,4.1,48,25.5,30,SP98,True,False,True,False,4.5,1.953,0.24698795180722888,0.482367469879518 +15.4,5.0,48,25.5,31,SP98,True,False,False,False,5.5,1.953,0.3246753246753247,0.6340909090909091 +16.0,4.0,43,22.0,21,SP98,True,False,False,False,-1.0,1.953,0.25,0.48825 +15.4,4.5,42,22.0,30,SP98,False,False,False,False,8.0,1.953,0.2922077922077922,0.5706818181818182 +16.0,3.6,43,22.0,19,SP98,False,False,False,False,-3.0,1.953,0.225,0.439425 +20.9,4.1,33,22.0,21,SP98,False,False,False,False,-1.0,1.953,0.19617224880382775,0.3831244019138756 +32.1,3.9,47,22.0,26,SP98,False,False,False,False,4.0,1.953,0.12149532710280372,0.2372803738317757 +15.3,6.6,16,24.0,27,SP98,True,False,False,False,3.0,1.953,0.4313725490196078,0.8424705882352941 +28.5,4.0,44,25.0,23,SP98,True,False,False,False,-2.0,1.953,0.14035087719298245,0.27410526315789474 +11.9,3.7,33,25.0,19,SP98,False,False,False,False,-6.0,1.953,0.31092436974789917,0.6072352941176471 +14.5,4.3,36,25.0,22,SP98,False,False,False,False,-3.0,1.953,0.296551724137931,0.5791655172413793 +16.6,4.1,49,22.0,26,SP98,False,False,True,False,4.0,1.953,0.24698795180722888,0.482367469879518 +16.2,4.4,39,24.5,17,SP98,True,False,False,False,-7.5,1.953,0.271604938271605,0.5304444444444445 +16.7,3.8,44,24.5,23,SP98,False,False,True,False,-1.5,1.953,0.2275449101796407,0.44439520958083834 +8.3,5.0,52,24.5,27,SP98,False,False,False,False,2.5,1.953,0.6024096385542168,1.1765060240963854 +5.5,3.7,33,24.5,28,SP98,False,False,True,False,3.5,1.953,0.6727272727272727,1.3138363636363637 +13.6,3.7,33,24.5,28,SP98,False,False,True,False,3.5,1.953,0.2720588235294118,0.5313308823529412 +16.0,3.7,39,24.5,18,SP98,False,False,False,False,-6.5,1.953,0.23125,0.45163125000000004 +16.1,4.3,38,25.0,31,SP98,True,False,False,False,6.0,1.953,0.26708074534161486,0.5216086956521738 +16.0,3.8,45,25.0,19,SP98,False,False,False,False,-6.0,1.953,0.2375,0.4638375 +15.4,4.6,42,25.0,31,SP98,True,False,False,False,6.0,1.953,0.2987012987012987,0.5833636363636363 +14.7,5.0,25,25.0,30,SP98,True,False,False,False,5.0,1.953,0.3401360544217687,0.6642857142857143 diff --git a/measurements.csv b/Data/measurements.csv similarity index 100% rename from measurements.csv rename to Data/measurements.csv diff --git a/measurements2.xlsx b/Data/measurements2.xlsx similarity index 100% rename from measurements2.xlsx rename to Data/measurements2.xlsx