diff --git a/consume.ipynb b/consume.ipynb new file mode 100644 index 0000000..f244f6b --- /dev/null +++ b/consume.ipynb @@ -0,0 +1,1428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## COBIFY - Best Fuel" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Let's import the libraries we'll use and the csv file with the data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "from sklearn.preprocessing import StandardScaler\n", + "import statsmodels.formula.api as smf" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data = pd.read_csv('/home/julian/Cursos/Ironhack/Career/reto_data/consume_data.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploratory Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 376 entries, 0 to 375\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 consume_id 376 non-null int64 \n", + " 1 distance 376 non-null float64\n", + " 2 consume 376 non-null float64\n", + " 3 speed 376 non-null int64 \n", + " 4 temp_inside 376 non-null float64\n", + " 5 temp_outside 376 non-null int64 \n", + " 6 gas_type 376 non-null object \n", + " 7 AC 376 non-null int64 \n", + " 8 rain 376 non-null int64 \n", + " 9 sun 376 non-null int64 \n", + " 10 snow 376 non-null int64 \n", + "dtypes: float64(3), int64(7), object(1)\n", + "memory usage: 32.4+ KB\n" + ] + } + ], + "source": [ + "consume_data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "consume_id distance consume speed temp_inside temp_outside gas_type AC rain sun snow \n", + "False False False False False False False False False False False 376\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consume_data.isna().value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SP98 219\n", + "E10 157\n", + "Name: gas_type, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consume_data.gas_type.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We can see that all variabels are numerical, except for gas_type, which we'll transform into a dummy variable. On the other hand, we don't have null values.\n", + "- We'll also drop consume_id column as is an index column.\n", + "- 58% of cars use SP98 gas." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data = pd.get_dummies(consume_data, columns=['gas_type'], drop_first=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data.drop(columns=['consume_id'], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsideACrainsunsnowgas_type_SP98
028.05.02621.51200000
112.04.23021.51300000
211.25.53821.51500000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside AC rain sun snow \\\n", + "0 28.0 5.0 26 21.5 12 0 0 0 0 \n", + "1 12.0 4.2 30 21.5 13 0 0 0 0 \n", + "2 11.2 5.5 38 21.5 15 0 0 0 0 \n", + "\n", + " gas_type_SP98 \n", + "0 0 \n", + "1 0 \n", + "2 0 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consume_data.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MinimumMaximumMeanMedianMode
Consume3.312.24.9252664.74.5
\n", + "
" + ], + "text/plain": [ + " Minimum Maximum Mean Median Mode\n", + "Consume 3.3 12.2 4.925266 4.7 4.5" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consume = {'Minimum': consume_data.consume.min(),\n", + " 'Maximum': consume_data.consume.max(),\n", + " 'Mean': consume_data.consume.mean(),\n", + " 'Median': consume_data.consume.median(),\n", + " 'Mode': consume_data.consume.mode()}\n", + "consume_descr = pd.DataFrame(consume) \n", + "consume_descr = consume_descr.rename(index={0:'Consume'})\n", + "consume_descr" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAASnUlEQVR4nO3df7BdZ13v8feH/uRXTHN7mntiLYGxVmuvLcwRhSq3t6VOp2FIcWjlXpTorWaci6jgEIM44/jHnelU5wJeHZxYsAFrJVawgVYgN/Jj1FKbliohpfZObynxHJLwSyHNQMv53j/2yvTkJDln58fa+6TP+zXT2Xs9+1lrfbMIn73yrLWflapCktSOZ427AEnSaBn8ktQYg1+SGmPwS1JjDH5Jaszp4y5gGOeee26tXr163GVI0inl/vvv/0pVTcxvPyWCf/Xq1ezYsWPcZUjSKSXJF4/U7lCPJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ15pT45e4zzUUXX8LM9PSCfSZXreLhXTtHVJGklhj8YzAzPc21N9+1YJ+7N6wZUTWSWuNQjyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TG9Br8SZYnuSPJF5I8lORlSVYk2Zbkke71nD5rkCQdqu8z/ncBH62qHwQuBR4CNgLbq+pCYHu3LEkakd6CP8ky4BXAewCq6jtV9Q1gLbC567YZuK6vGiRJh+vzjP9FwD7gT5N8NsktSZ4LrKyqGYDu9bwea5AkzdNn8J8OvAR4d1W9GNjPMQzrJFmfZEeSHfv27eurRklqTp/BvxvYXVX3dst3MPgi2JNkEqB73XuklatqU1VNVdXUxMREj2VKUlt6C/6q+jLwpSQXdU1XAbuArcC6rm0dcGdfNUiSDtf3E7jeBNyW5EzgUeAXGHzZbElyI/A4cH3PNUiS5ug1+KvqQWDqCB9d1ed+JUlH5y93JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxvQ9SZuO0/4DB1i2fMWi/SZXreLhXTtHUJGkZwqDf4mq2VmuvfmuRfvdvWHNCKqR9EziUI8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxvQ6ZUOSx4BvAt8FnqqqqSQrgA8Aq4HHgBuq6ut91iFJetoozvj/S1VdVlVT3fJGYHtVXQhs75YlSSMyjqGetcDm7v1m4Lox1CBJzeo7+Av4eJL7k6zv2lZW1QxA93pezzVIkuboe1rmy6tqOsl5wLYkXxh2xe6LYj3ABRdc0Fd9ktScXs/4q2q6e90LfAh4KbAnySRA97r3KOtuqqqpqpqamJjos0xJakpvwZ/kuUmef/A98FPATmArsK7rtg64s68aJEmH63OoZyXwoSQH9/PnVfXRJPcBW5LcCDwOXN9jDZKkeXoL/qp6FLj0CO1fBa7qa7+SpIX5y11JaozBL0mNMfglqTEGvyQ1pu8fcD2jXHTxJcxMTy/YZ3LVKh7etXNEFUnSsTP4j8HM9DTX3nzXgn3u3rBmRNVI0vFxqEeSGmPwS1JjDH5JaozBL0mN8eLuKW7/gQMsW75iwT7eaSRpLoP/FFezs95pJOmYONQjSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1pvfgT3Jaks8m+Ui3vCLJtiSPdK/n9F2DJOlpozjj/zXgoTnLG4HtVXUhsL1bliSNSK/Bn+R8YA1wy5zmtcDm7v1m4Lo+a5AkHarvM/53AhuA2TltK6tqBqB7Pe9IKyZZn2RHkh379u3ruUxJakdvwZ/kVcDeqrr/eNavqk1VNVVVUxMTEye5OklqV58PYrkceHWSa4GzgWVJ/gzYk2SyqmaSTAJ7e6xBkjRPb2f8VfW2qjq/qlYDrwP+tqp+FtgKrOu6rQPu7KsGSdLhxnEf/03A1UkeAa7uliVJIzKSZ+5W1SeBT3bvvwpcNYr9SpIO5y93JakxQwV/ksuHaZMkLX3DnvH/7yHbJElL3IJj/EleBrwcmEjyljkfLQNO67MwSVI/Fru4eybwvK7f8+e0/zvw2r6KkiT1Z8Hgr6pPAZ9KcmtVfXFENUmSejTs7ZxnJdkErJ67TlVd2UdRkqT+DBv8fwn8MYNZNr/bXzmSpL4NG/xPVdW7e61EkjQSw97O+eEk/yPJZPcErRVJVvRamSSpF8Oe8R+cVO2tc9oKeNHJLefUt//AAZYtX/g7cf8TT4yoGkk63FDBX1Uv7LuQZ4qaneXam+9asM+WN14xmmIk6QiGCv4kbzhSe1W97+SWI0nq27BDPT865/3ZDGbXfAAw+CXpFDPsUM+b5i4n+R7g/b1UJEnq1fFOy/wEcOHJLESSNBrDjvF/mMFdPDCYnO2HgC19FSVJ6s+wY/y/P+f9U8AXq2p3D/WoB8PcYjq5ahUP79o5oookjdOwY/yfSrKSpy/yPtJfSTrZhrnF9O4Na0ZUjaRxG/YJXDcA/whcD9wA3JvEaZkl6RQ07FDP24Efraq9AEkmgP8D3NFXYZKkfgx7V8+zDoZ+56vHsK4kaQkZ9oz/o0k+BtzeLf8McHc/JUmS+rTYM3e/H1hZVW9N8tPATwAB7gFuW2Tds4FPA2d1+7mjqn6nm9XzAwwe6vIYcENVff0E/xySpCEtNlzzTuCbAFX1wap6S1W9mcHZ/jsXWffbwJVVdSlwGXBNkh8HNgLbq+pCYHu3LEkakcWCf3VV/fP8xqraweCM/ahq4Fvd4hndfwWsBTZ37ZuB646hXknSCVos+M9e4LNnL7bxJKcleRDYC2yrqnsZDB3NAHSv5x1l3fVJdiTZsW/fvsV2JUka0mLBf1+SX5rfmORG4P7FNl5V362qy4DzgZcmuWTYwqpqU1VNVdXUxMTEsKtJkhax2F09vw58KMnreTrop4AzgdcMu5Oq+kaSTwLXAHuSTFbVTJJJBv8akCSNyIJn/FW1p6peDvwugztwHgN+t6peVlVfXmjdJBNJlnfvnw28EvgCsJWnH+W4DrjzBOqXJB2jYefq+QTwiWPc9iSwOclpDL5gtlTVR5LcA2zphoseZzANhCRpRIb9Adcx6+4GevER2r/K4AlekqQxcNoFSWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5Ia09t9/Dq17D9wgGXLVyzYZ3LVKh7etXNEFUnqi8EvAGp2lmtvvmvBPndvWDOiaiT1yaEeSWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSY3qZlTvJ9wPuA/wjMApuq6l1JVgAfAFYDjwE3VNXX+6pjGBddfAkz09OL9tv/xBMjqEaS+tXnfPxPAb9RVQ8keT5wf5JtwM8D26vqpiQbgY3Ab/ZYx6JmpqcXnYseYMsbr+i/GEnqWW9DPVU1U1UPdO+/CTwEfC+wFtjcddsMXNdXDZKkw41kjD/JauDFwL3AyqqagcGXA3DeUdZZn2RHkh379u0bRZmS1ITegz/J84C/An69qv592PWqalNVTVXV1MTERH8FSlJjeg3+JGcwCP3bquqDXfOeJJPd55PA3j5rkCQdqrfgTxLgPcBDVfW/5ny0FVjXvV8H3NlXDZKkw/V5V8/lwM8Bn0vyYNf2W8BNwJYkNwKPA9f3WIMkaZ7egr+q/g7IUT6+qq/9SpIW5i93JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmN4etq5nnv0HDrBs+YoF+3z7ySc564wzFt3W5KpVPLxr58kqTdIxMPg1tJqd5dqb71qwz5Y3XsFr3vHxRbd194Y1J6ssScfIoR5JakxvwZ/kvUn2Jtk5p21Fkm1JHulez+lr/5KkI+vzjP9W4Jp5bRuB7VV1IbC9W5YkjVBvwV9Vnwa+Nq95LbC5e78ZuK6v/UuSjmzUF3dXVtUMQFXNJDnvaB2TrAfWA1xwwQUjKk+jMswdQt75I/Vjyd7VU1WbgE0AU1NTNeZydJINc4eQd/5I/Rj1XT17kkwCdK97R7x/SWreqIN/K7Cue78OuHPE+5ek5vV5O+ftwD3ARUl2J7kRuAm4OskjwNXdsiRphHob46+q/3qUj67qa5+SpMX5y11JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmOW7MPWpVPVRRdfwsz09IJ9Jlet4uFdO0dUkXQog186yWamp7n25rsW7HP3hjUjqkY6nEM9ktQYz/iljkM0aoXBL3UcolErHOqRpMYY/JLUmLEM9SS5BngXcBpwS1Xd1Ne+hhm33f/EE33tXidg/4EDLFu+YsE+337ySc4644wT7gOn7t+DYf6OD3MMTub1i1FeLxn1tZmTtb9htjPsto7VyIM/yWnAHwFXA7uB+5JsrapdfexvmHHbLW+8oo9d6wTV7OxQ/9u95h0fP+E+B/udiob9O77YMTiZ1y9Geb1k1NdmTtb+htnOsNs6VuMY6nkp8H+r6tGq+g7wF8DaMdQhSU1KVY12h8lrgWuq6he75Z8DfqyqfmVev/XA+m7xIuDhkRY6vHOBr4y7iCXGY3Ioj8ehPB6H6+uYvKCqJuY3jmOMP0doO+zbp6o2AZv6L+fEJNlRVVPjrmMp8ZgcyuNxKI/H4UZ9TMYx1LMb+L45y+cDi1/hkCSdFOMI/vuAC5O8MMmZwOuArWOoQ5KaNPKhnqp6KsmvAB9jcDvne6vq86Ou4yRa8sNRY+AxOZTH41Aej8ON9JiM/OKuJGm8/OWuJDXG4Jekxhj8JyDJaUk+m+Qj465lKUiyPMkdSb6Q5KEkLxt3TeOU5M1JPp9kZ5Lbk5w97ppGLcl7k+xNsnNO24ok25I80r2eM84aR+kox+P3uv/P/HOSDyVZ3ncdBv+J+TXgoXEXsYS8C/hoVf0gcCkNH5sk3wv8KjBVVZcwuJHhdeOtaixuBa6Z17YR2F5VFwLbu+VW3Mrhx2MbcElV/QjwL8Db+i7C4D9OSc4H1gC3jLuWpSDJMuAVwHsAquo7VfWNsRY1fqcDz05yOvAcGvy9SlV9GvjavOa1wObu/WbgulHWNE5HOh5V9fGqeqpb/AyD3zb1yuA/fu8ENgCzY65jqXgRsA/4027465Ykzx13UeNSVf8K/D7wODAD/FtVLT5TXBtWVtUMQPd63pjrWUr+O/A3fe/E4D8OSV4F7K2q+8ddyxJyOvAS4N1V9WJgP239E/4Q3bj1WuCFwCrguUl+drxVaSlL8nbgKeC2vvdl8B+fy4FXJ3mMweyiVyb5s/GWNHa7gd1VdW+3fAeDL4JWvRL4f1W1r6qeBD4IvHzMNS0Ve5JMAnSve8dcz9glWQe8Cnh9jeDHVQb/caiqt1XV+VW1msEFu7+tqqbP5qrqy8CXklzUNV0F9PKMhVPE48CPJ3lOkjA4Hs1e7J5nK7Cue78OuHOMtYxd92Cq3wReXVUjeRqQD1vXyfQm4LZuDqZHgV8Ycz1jU1X3JrkDeIDBP98/S4NTFSS5HbgCODfJbuB3gJuALUluZPAFef34KhytoxyPtwFnAdsG5wh8pqp+udc6nLJBktriUI8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+NWMJG/o5jz/pyTvT/KCJNu7tu1JLuj63ZrkD5L8Q5JHk7y2a59M8ukkD3Zz7P9k1/6tOft4bZJb52zn3Uk+0W3nP3fzsT90sE/X76eS3JPkgSR/meR5ozwuao/BryYk+WHg7cCVVXUpg2cp/CHwvm4e9NuAP5izyiTwEwzmT7mpa/tvwMeq6jIGzxt4cIhdnwNcCbwZ+DDwDuCHgf+U5LIk5wK/Dbyyql4C7ADecvx/UmlxTtmgVlwJ3FFVXwGoqq91Twj76e7z9wM3z+n/11U1C+xKsrJruw94b5Izus8fHGK/H66qSvI5YE9VfQ4gyeeB1QzmXr8Y+Pvu5/pnAvcc/x9TWpxn/GpFgMXmJ5n7+bfnrXvwIRqvAP4VeH+SNxxhvfmPVzy4ndl525xlcOIVYFtVXdb9d3FV3bjYH0Y6EQa/WrEduCHJf4DBc1+Bf+DpxyG+Hvi7hTaQ5AUMnsPwJwyeNHZw2uk9SX4oybOA1xxjXZ8BLk/y/d0+npPkB45xG9IxcahHTaiqzyf5n8CnknyXwWyZv8pg6OatDJ4etthsolcAb03yJPAt4OAZ/0bgI8CXgJ3A0Bdnq2pfkp8Hbk9yVtf82wyevSr1wtk5JakxDvVIUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSY/w96vxbZX8tNBQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(x='consume', data=consume_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAEGCAYAAABbzE8LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAMVUlEQVR4nO3df6yeZ13H8c93rUA3Q4AOFyyRIzkqAhPE/qGiSOYwDSOgBo0Rs6okxn+6ZiYohCb+UxOiJjpqQoKI7ZBgZPEXZFbKNBLlR+xgusEWPcHyo8IoXUTZJtD18o/zFE+7re1z9jzPt6fn9UqW9tzPfV/X1ft07969m+d+aowRABbviu4FAGxWAgzQRIABmggwQBMBBmiydZqdr7766rG0tDSnpQBcnu68884vjzGeee72qQK8tLSUo0ePzm5VAJtAVX3msba7BQHQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMECTqT4TbqM7cOBAVlZWpj7u+PHjSZIdO3bMbC3Ly8vZs2fPzMYDNp5NFeCVlZXcdc+9eeTKZ0x13JaHvpIk+eLXZnO6tjz0wEzGATa2TRXgJHnkymfk4ee9cqpjtt13e5JMfdyFxgM2N/eAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaLCTABw4cyIEDBxYxFXPiewizt3URk6ysrCxiGubI9xBmzy0IgCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQSYmTp58mRuuummnDx5cqrX1lpZWckNN9yQlZWVuaxjFuNPM9969puHrrnnPe9GHl+AmalDhw7l7rvvzq233jrVa2vt378/Dz74YPbv3z+Xdcxi/GnmW89+89A197zn3cjjCzAzc/LkyRw+fDhjjBw+fPisK4bzvbbWyspKjh07liQ5duzYuq5SzzfXLMafZr717DcPXXPPe96NPv7WmY72OI4fP56HH344e/fuXcR0j2tlZSVXfH20riFJrvjf/87Kyv+0n49prKysZNu2befd59ChQzl9+nSS5JFHHsmtt96am2+++YKvrXXuVen+/ftz8ODBqdZ6vrlmMf40861nv3nomnve82708S94BVxVv1JVR6vq6IkTJ2Y2MZefD37wgzl16lSS5NSpUzly5MhFvbbWmavTx/v6ia5jFuNPM9969puHrrnnPe9GH/+CV8BjjLcneXuS7Ny5c12Xjzt27EiS3HLLLes5fGb27t2bOz99f+sakuT0U56a5ede034+pnExV+vXX399br/99pw6dSpbt27NK17xiot6ba2lpaWzori0tDT1Ws831yzGn2a+9ew3D11zz3vejT6+e8DMzO7du3PFFau/pbZs2ZIbb7zxol5ba9++fef9+omuYxbjTzPfevabh6655z3vRh9fgJmZ7du3Z9euXamq7Nq1K9u3b7+o19ZaXl7+5lXp0tJSlpeXZ7qOWYw/zXzr2W8euuae97wbfXwBZqZ2796da6+99jGvFM732lr79u3LVVdd9YSuTs831yzGn2a+9ew3D11zz3vejTx+jXHxt3V37tw5jh49OvUkZ+4fdt/zPHMP+OHnvXKq47bdd3uSTH3c+cb7gQ16D3gjrRkuFVV15xhj57nbXQEDNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZosnURkywvLy9iGubI9xBmbyEB3rNnzyKmYY58D2H23IIAaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQJOt3QtYtC0PPZBt990+5TEnk2Tq4863huSamYwFbFybKsDLy8vrOu748VNJkh07ZhXNa9a9FuDysakCvGfPnu4lAHyTe8AATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJjXGuPidq04k+cz8lvOEXJ3ky92LuIQ4H4/mnJzN+TjbPM/Hc8YYzzx341QBvpRV1dExxs7udVwqnI9Hc07O5nycreN8uAUB0ESAAZpcTgF+e/cCLjHOx6M5J2dzPs628PNx2dwDBthoLqcrYIANRYABmlwWAa6qLVX1iap6f/daLgVV9bSquq2q7quqe6vqh7rX1Kmqbq6qT1bVPVX1nqp6SveaFq2q3llVX6qqe9Zse0ZVHamqf5/8+PTONS7S45yP35n8P/OvVfUXVfW0ea/jsghwkr1J7u1exCXkliSHxxjPS/KibOJzU1U7ktyUZOcY44VJtiT5ud5VtTiYZNc5296Y5I4xxncluWPy9WZxMI8+H0eSvHCM8X1J/i3Jm+a9iA0f4Kp6dpIbkryjey2Xgqp6apKXJfmjJBljfH2M8V+ti+q3Ncm2qtqa5Mok/9m8noUbY3woyQPnbH5NkkOTnx9K8pOLXFOnxzofY4wPjDFOTb78aJJnz3sdGz7ASX4/ya8nOd28jkvFc5OcSPLHk9sy76iqq7oX1WWMcTzJ7yb5bJIvJPnKGOMDvau6ZFwzxvhCkkx+/Lbm9VxKfjnJ38x7kg0d4Kp6VZIvjTHu7F7LJWRrkpckedsY4/uTPJjN9VfLs0zua74myXcm+fYkV1XVL/SuiktZVb05yakk7573XBs6wElemuTVVXUsyZ8mua6q/qR3Se0+n+TzY4yPTb6+LatB3qyuT/IfY4wTY4xvJPnzJD/cvKZLxf1V9awkmfz4peb1tKuq3UleleR1YwFvktjQAR5jvGmM8ewxxlJW/2Hl78YYm/rqZozxxSSfq6rvmWz68SSfalxSt88m+cGqurKqKqvnY9P+o+Q5/jrJ7snPdyf5q8a1tKuqXUl+I8mrxxgPLWLOrYuYhIXbk+TdVfWkJJ9O8kvN62kzxvhYVd2W5ONZ/WvlJ7IJ34JbVe9J8vIkV1fV55P8ZpK3JPmzqnp9Vv+g+pm+FS7W45yPNyV5cpIjq39W56NjjF+d6zq8FRmgx4a+BQGwkQkwQBMBBmgiwABNBBigiQADNBFggCYCzMJV1Y2TZ67+S1W9q6qeU1V3TLbdUVXfMdnvYFW9tao+XFWfrqrXTrY/q6o+VFV3TZ7x+6OT7V9dM8drq+rgmnHeVlV/PxnnxybPg733zD6T/X6iqj5SVR+vqvdW1bcu8ryw+QgwC1VVL0jy5iTXjTFelNVnOf9Bklsnz2F9d5K3rjnkWUl+JKvvz3/LZNvPJ/nbMcaLs/q847suYuqnJ7kuyc1J3pfk95K8IMm1VfXiqro6yb4k148xXpLkaJJfW/+vFC7MW5FZtOuS3DbG+HKSjDEemHxix09PXn9Xkt9es/9fjjFOJ/lUVV0z2fbPSd5ZVd8yef2ui5j3fWOMUVV3J7l/jHF3klTVJ5MsZfXZr89P8k+Tt6E+KclH1v/LhAtzBcyiVZILvf997etfO+fYMw/TflmS40neVVU3PsZx537s0JlxTp8z5umsXohUkiNjjBdP/nv+GOP1F/rFwBMhwCzaHUl+tqq2J6ufS5bkw/n/jwl6XZJ/PN8AVfWcrD4H+g+z+skfZx63eX9VfW9VXZHkp6Zc10eTvLSqlidzXFlV3z3lGDAVtyBYqDHGJ6vqt5L8Q1U9ktWnk92U1VsKb8jqp3lc6OltL0/yhqr6RpKvJjlzBfzGJO9P8rkk9yS56H9EG2OcqKpfTPKeqnryZPO+rH42GMyFp6EBNHELAqCJAAM0EWCAJgIM0ESAAZoIMEATAQZo8n+acomzw5dLOAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=consume_data.consume)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- From the histogram and boxplot, we can observe that consume variable has some outliers, which we'll have to take into account when doing the regression analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "corr_matrix = consume_data.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsideACrainsunsnowgas_type_SP98
distance1.000000-0.1313790.5655050.0753050.0867740.019030-0.0307840.078627-0.034428-0.047877
consume-0.1313791.000000-0.234537-0.161991-0.3187580.0894720.251756-0.1691730.072308-0.020928
speed0.565505-0.2345371.0000000.0597250.019327-0.024485-0.0041730.0771810.032500-0.099495
temp_inside0.075305-0.1619910.0597251.0000000.3613080.313197-0.0382690.2461200.0971560.010340
temp_outside0.086774-0.3187580.0193270.3613081.0000000.180096-0.1847630.349988-0.1629480.156990
AC0.0190300.089472-0.0244850.3131970.1800961.0000000.2421970.0859280.0631700.096943
rain-0.0307840.251756-0.004173-0.038269-0.1847630.2421971.000000-0.1119160.2777390.052789
sun0.078627-0.1691730.0771810.2461200.3499880.085928-0.1119161.000000-0.0310830.038120
snow-0.0344280.0723080.0325000.097156-0.1629480.0631700.277739-0.0310831.0000000.035231
gas_type_SP98-0.047877-0.020928-0.0994950.0103400.1569900.0969430.0527890.0381200.0352311.000000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside \\\n", + "distance 1.000000 -0.131379 0.565505 0.075305 0.086774 \n", + "consume -0.131379 1.000000 -0.234537 -0.161991 -0.318758 \n", + "speed 0.565505 -0.234537 1.000000 0.059725 0.019327 \n", + "temp_inside 0.075305 -0.161991 0.059725 1.000000 0.361308 \n", + "temp_outside 0.086774 -0.318758 0.019327 0.361308 1.000000 \n", + "AC 0.019030 0.089472 -0.024485 0.313197 0.180096 \n", + "rain -0.030784 0.251756 -0.004173 -0.038269 -0.184763 \n", + "sun 0.078627 -0.169173 0.077181 0.246120 0.349988 \n", + "snow -0.034428 0.072308 0.032500 0.097156 -0.162948 \n", + "gas_type_SP98 -0.047877 -0.020928 -0.099495 0.010340 0.156990 \n", + "\n", + " AC rain sun snow gas_type_SP98 \n", + "distance 0.019030 -0.030784 0.078627 -0.034428 -0.047877 \n", + "consume 0.089472 0.251756 -0.169173 0.072308 -0.020928 \n", + "speed -0.024485 -0.004173 0.077181 0.032500 -0.099495 \n", + "temp_inside 0.313197 -0.038269 0.246120 0.097156 0.010340 \n", + "temp_outside 0.180096 -0.184763 0.349988 -0.162948 0.156990 \n", + "AC 1.000000 0.242197 0.085928 0.063170 0.096943 \n", + "rain 0.242197 1.000000 -0.111916 0.277739 0.052789 \n", + "sun 0.085928 -0.111916 1.000000 -0.031083 0.038120 \n", + "snow 0.063170 0.277739 -0.031083 1.000000 0.035231 \n", + "gas_type_SP98 0.096943 0.052789 0.038120 0.035231 1.000000 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "corr_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "mascara = np.triu(np.ones_like(corr_matrix, dtype=bool))\n", + "color_map = sns.diverging_palette(0, 10, as_cmap=True) \n", + "\n", + "sns.heatmap(corr_matrix, \n", + " mask=mascara,\n", + " cmap=\"viridis\",\n", + " vmax=1,\n", + " vmin=-1,\n", + " square=True, \n", + " linewidth=.5, \n", + " cbar_kws={\"shrink\": .5} \n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We can see through the correlations heatmap that `consume` is mostly correlated with `temp_outside` (negatively), `rain` (positively), `speed` (negatively), `sun` (negatively), and `temp_inside` (negatively).\n", + "- We can also observe that `speed` and `distance` are correlated, and `temp_inside` wit `AC`. Both are not surprise.\n", + "- Next we'll plot some of these variables against the consume." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x=consume_data.temp_outside, y=consume_data.consume, hue=consume_data.gas_type_SP98);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We can see how lower outside temperatures accounts for higher consumption levels. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))\n", + "sns.scatterplot(data=consume_data, x=\"temp_inside\", y=\"consume\", hue=\"gas_type_SP98\", ax=axs[0])\n", + "sns.scatterplot(data=consume_data, x=\"temp_inside\", y=\"consume\", hue=\"AC\", ax=axs[1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We can see that for lower inside temperature values, there are higher consumption values. On the other hand, we can see that most of the cars had the AC off, but those with the AC on, accounts for higher consumption values" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
consume
gas_type_SP98
04.950955
14.906849
\n", + "
" + ], + "text/plain": [ + " consume\n", + "gas_type_SP98 \n", + "0 4.950955\n", + "1 4.906849" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtype_gr = consume_data.groupby('gas_type_SP98').agg({'consume': 'mean'})\n", + "gtype_gr" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
consume
AC
04.897391
15.235484
\n", + "
" + ], + "text/plain": [ + " consume\n", + "AC \n", + "0 4.897391\n", + "1 5.235484" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ac_group = consume_data.groupby('AC').agg({'consume': 'mean'})\n", + "ac_group" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
consume
rain
04.827576
15.626087
\n", + "
" + ], + "text/plain": [ + " consume\n", + "rain \n", + "0 4.827576\n", + "1 5.626087" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rain_gr = consume_data.groupby('rain').agg({'consume': 'mean'})\n", + "rain_gr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- From the above data frames we can see that the consumption is higher in average when: the AC is on, is raining\n", + "- But also the consumption is higher in average when the car is using the ethanol type of gas. Although is higher in a small proportion, it's considerable given that most of the cars in the data set use non-ethanol gas. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### REGRESSION ANALYSIS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We'll model the `consume` variable as a linear regression over the other variables to check which of them has statistical significance in order to explain the consume of the car." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "linear_model = smf.ols(\"consume ~ distance + speed + temp_inside + temp_outside + AC + rain + sun + snow + gas_type_SP98\", data=consume_data).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: consume R-squared: 0.208
Model: OLS Adj. R-squared: 0.188
Method: Least Squares F-statistic: 10.68
Date: Sun, 17 Oct 2021 Prob (F-statistic): 9.76e-15
Time: 20:45:17 Log-Likelihood: -504.20
No. Observations: 376 AIC: 1028.
Df Residuals: 366 BIC: 1068.
Df Model: 9
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Intercept 7.7409 1.186 6.526 0.000 5.408 10.073
distance 0.0018 0.003 0.683 0.495 -0.003 0.007
speed -0.0184 0.004 -4.259 0.000 -0.027 -0.010
temp_inside -0.0776 0.055 -1.414 0.158 -0.185 0.030
temp_outside -0.0394 0.008 -4.873 0.000 -0.055 -0.023
AC 0.4471 0.195 2.298 0.022 0.065 0.830
rain 0.5432 0.162 3.344 0.001 0.224 0.863
sun -0.1210 0.190 -0.636 0.525 -0.495 0.253
snow -0.1020 0.503 -0.203 0.839 -1.091 0.887
gas_type_SP98 -0.0408 0.101 -0.406 0.685 -0.239 0.157
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 248.482 Durbin-Watson: 2.096
Prob(Omnibus): 0.000 Jarque-Bera (JB): 2919.818
Skew: 2.651 Prob(JB): 0.00
Kurtosis: 15.580 Cond. No. 1.37e+03


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.37e+03. This might indicate that there are
strong multicollinearity or other numerical problems." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: consume R-squared: 0.208\n", + "Model: OLS Adj. R-squared: 0.188\n", + "Method: Least Squares F-statistic: 10.68\n", + "Date: Sun, 17 Oct 2021 Prob (F-statistic): 9.76e-15\n", + "Time: 20:45:17 Log-Likelihood: -504.20\n", + "No. Observations: 376 AIC: 1028.\n", + "Df Residuals: 366 BIC: 1068.\n", + "Df Model: 9 \n", + "Covariance Type: nonrobust \n", + "=================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "---------------------------------------------------------------------------------\n", + "Intercept 7.7409 1.186 6.526 0.000 5.408 10.073\n", + "distance 0.0018 0.003 0.683 0.495 -0.003 0.007\n", + "speed -0.0184 0.004 -4.259 0.000 -0.027 -0.010\n", + "temp_inside -0.0776 0.055 -1.414 0.158 -0.185 0.030\n", + "temp_outside -0.0394 0.008 -4.873 0.000 -0.055 -0.023\n", + "AC 0.4471 0.195 2.298 0.022 0.065 0.830\n", + "rain 0.5432 0.162 3.344 0.001 0.224 0.863\n", + "sun -0.1210 0.190 -0.636 0.525 -0.495 0.253\n", + "snow -0.1020 0.503 -0.203 0.839 -1.091 0.887\n", + "gas_type_SP98 -0.0408 0.101 -0.406 0.685 -0.239 0.157\n", + "==============================================================================\n", + "Omnibus: 248.482 Durbin-Watson: 2.096\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 2919.818\n", + "Skew: 2.651 Prob(JB): 0.00\n", + "Kurtosis: 15.580 Cond. No. 1.37e+03\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The condition number is large, 1.37e+03. This might indicate that there are\n", + "strong multicollinearity or other numerical problems.\n", + "\"\"\"" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linear_model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- From the summary we can see that a linear model is not very good to model the consumption, as we got a R2 of 0.208. \n", + "- We can see from the p-values that `distance` and `temp_inside` are not significant, which is no suprise given the correlation they have with `speed` and `AC` respectively.\n", + "- We can also see that `sun`, `snow` and `gas_type` are non-significant.\n", + "- We'll perform another regression but this time without `distance`. We'll log-transform the target variable to see if we can reduce the potential influence of outliers and standarize the data set." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data['log_consume'] = np.log(consume_data.consume)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEHCAYAAACp9y31AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAATuklEQVR4nO3dfbCedX3n8fcHYoxbsEBzksnGpNEuPnRsAfdQNbidVkpL3Z1Cu0K662Lqsht2Wh2sO63Ufeh0+o+722nZdVcl9SndugoilGgtyqYg7aBIUCposDiKkEkgkfqAdkcn5Lt/3FfkkJyTc5+Q637I7/2auee+r999PXyTOdfn/M7vvu7flapCktSOk8ZdgCRptAx+SWqMwS9JjTH4JakxBr8kNWbZuAsYxsqVK2vDhg3jLkOSpsrdd9/99aqaObx9KoJ/w4YN7Ny5c9xlSNJUSfK1+dod6pGkxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/I1Yu249SZb8WLtu/bhLl3ScTcWUDXr69ux+mE3X3LHk7a69YmMP1UgaJ3v8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhrTa/AnOS3J9UnuT7IrycuTnJHkliQPdM+n91mDJOmp+u7x/3fg5qp6IXAWsAu4CthRVWcCO7plSdKI9Bb8SZ4N/DTwboCq+n5VfRO4CNjWrbYNuLivGiRJR+qzx/88YD/w3iSfS/KuJD8ErK6qvQDd86oea5AkHabP4F8GvAR4R1WdA3yXJQzrJNmSZGeSnfv37++rRklqTp/BvxvYXVV3dsvXM/hF8GiSNQDd8775Nq6qrVU1W1WzMzMzPZYpSW3pLfir6hHg4SQv6JrOB74IbAc2d22bgZv6qkGSdKS+78D1BuD9SZYDXwFex+CXzXVJLgceAi7puQZJ0hy9Bn9V3QPMzvPW+X0eV5K0ML+5K0mNMfglqTEGvyQ1xuCXpMYY/Dru1q5bT5IlPdauWz/usqVm9H05pxq0Z/fDbLrmjiVtc+0VG3uqRtLh7PFLUmMMfklqjMEvSY0x+CWpMQa/JDXGq3p0dCctI8m4q5B0HBn8OrqDB7w0UzrBONQjSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTG9TtmQ5EHgceAJ4EBVzSY5A7gW2AA8CFxaVd/osw5J0pNG0eP/2ao6u6pmu+WrgB1VdSawo1uWJI3IOIZ6LgK2da+3ARePoQZJalbfwV/AJ5LcnWRL17a6qvYCdM+req5BkjRH39Myn1dVe5KsAm5Jcv+wG3a/KLYArF+/vq/6JKk5vfb4q2pP97wPuBH4KeDRJGsAuud9C2y7tapmq2p2ZmamzzIlqSm9BX+SH0py6qHXwM8D9wHbgc3dapuBm/qqQZJ0pD6HelYDN3a37VsG/J+qujnJXcB1SS4HHgIu6bEGSdJhegv+qvoKcNY87Y8B5/d1XEnS0fnNXUlqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5Ia03vwJzk5yeeSfLRbPiPJLUke6J5P77sGSdKTRtHjvxLYNWf5KmBHVZ0J7OiWJUkj0mvwJ3kO8E+Bd81pvgjY1r3eBlzcZw2SpKfqu8d/NfDbwME5baurai9A97xqvg2TbEmyM8nO/fv391ymJLWjt+BP8s+AfVV197FsX1Vbq2q2qmZnZmaOc3WS1K5lPe77POCXkrwKWAE8O8mfAo8mWVNVe5OsAfb1WIMk6TC99fir6neq6jlVtQH4VeAvq+pfAduBzd1qm4Gb+qpBknSkcVzH/1bggiQPABd0y5KkEelzqOcHquo24Lbu9WPA+aM4riTpSH5zV5IaM1TwJzlvmDZJ0uQbtsf/tiHbJEkT7qhj/EleDmwEZpK8ac5bzwZO7rMwSVI/FvtwdzlwSrfeqXPavw28uq+iJEn9OWrwV9UngU8meV9VfW1ENUmSejTs5ZzPTLIV2DB3m6p6ZR9FSZL6M2zwfwh4J4NZNp/orxxJUt+GDf4DVfWOXiuRJI3EsJdzfiTJrydZ091B64wkZ/RamSSpF8P2+A9NqvZbc9oKeN7xLUfDWLtuPXt2PzzuMiRNqaGCv6qe23chGt6e3Q+z6Zo7lrTNtVds7KkaSdNmqOBP8tr52qvqT45vOZKkvg071HPunNcrGMyu+VnA4JekKTPsUM8b5i4n+WHgf/dSUWMcr5c0asc6H//fA2cez0Ja5Xi9pFEbdoz/Iwyu4oHB5GwvAq7rqyhJUn+G7fH/wZzXB4CvVdXuHuqRJPVsqC9wdZO13c9ghs7Tge/3WZQkqT/D3oHrUuAzwCXApcCdSZyWWZKm0LBDPf8BOLeq9gEkmQH+L3B9X4VJkvox7Fw9Jx0K/c5jS9hWkjRBhu3x35zk48AHuuVNwMf6KUmS1KfF7rn7j4DVVfVbSX4FeAUQ4FPA+xfZdgVwO/DM7jjXV9XvdrN6Xsvgpi4PApdW1Tee5r9DkjSkxYZrrgYeB6iqG6rqTVX1mwx6+1cvsu33gFdW1VnA2cCFSV4GXAXsqKozgR3dsiRpRBYL/g1V9fnDG6tqJ4Me+4Jq4Dvd4jO6RwEXAdu69m3AxUuoV5L0NC0W/CuO8t6zFtt5kpOT3APsA26pqjsZDB3tBeieVy2w7ZYkO5Ps3L9//2KHmghr160nyZIekjRqi324e1eSf1tVfzy3McnlwN2L7byqngDOTnIacGOSFw9bWFVtBbYCzM7O1iKrTwTn3ZE0DRYL/jcyCOzX8GTQzwLLgV8e9iBV9c0ktwEXAo8mWVNVe5OsYfDXgCRpRI461FNVj1bVRuD3GFyB8yDwe1X18qp65GjbJpnpevokeRbwcwymfdjOk7dy3Azc9DTqlyQt0bDz8d8K3LrEfa8BtiU5mcEvmOuq6qNJPgVc1w0XPcRgGghJ0ogc63z8i+quBjpnnvbHGNzBS5I0Bk67IEmNMfglqTEGvyQ1xuCXpMYY/JoMJy1b8reely1fseRtkrB23fpx/2ulsertqh5pSQ4eOKZvPS91m0PbSS2zxy9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mN6S34k6xLcmuSXUm+kOTKrv2MJLckeaB7Pr2vGiRJR+qzx38A+PdV9SLgZcBvJPlx4CpgR1WdCezoliVJI9Jb8FfV3qr6bPf6cWAXsBa4CNjWrbYNuLivGiRJRxrJGH+SDcA5wJ3A6qraC4NfDsCqBbbZkmRnkp379+8fRZmS1ITegz/JKcCHgTdW1beH3a6qtlbVbFXNzszM9FegJDWm1+BP8gwGof/+qrqha340yZru/TXAvj5rkCQ9VZ9X9QR4N7Crqv5wzlvbgc3d683ATX3VIEk60rIe930ecBlwb5J7ura3AG8FrktyOfAQcEmPNUiSDtNb8FfVXwNZ4O3z+zquJOno/OauJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+aQhr160nyZIea9etH3fZ0rz6nI9fOmHs2f0wm665Y0nbXHvFxp6qkZ4ee/yS1Bh7/GrPScsY3BlUapPBr/YcPOCwjZrmUI8kNcbgl6TGGPyS1BiDX5IaY/BLUmN6C/4k70myL8l9c9rOSHJLkge659P7Or4kaX599vjfB1x4WNtVwI6qOhPY0S1Lkkaot+CvqtuBvzus+SJgW/d6G3BxX8eXJM1v1GP8q6tqL0D3vGqhFZNsSbIzyc79+/ePrEBJOtFN7Ie7VbW1qmaranZmZmbc5UjSCWPUwf9okjUA3fO+ER9fkpo36uDfDmzuXm8Gbhrx8SWpeX1ezvkB4FPAC5LsTnI58FbggiQPABd0y5KkEeptds6q+hcLvHV+X8eUJC1uYj/clST1w+CXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMM/nmsXbeeJEt+SNI06G1a5mm2Z/fDbLrmjiVvd+0VG3uoRpKOL3v8ktQYg19q1LEMaa5dt37cZes4cKhHatSxDGk6nHlisMcvSY0x+CWpMQa/1JeTlo1kDH3SLz/2s4TJ4xi/1JeDB0Yyhj7plx/7WcLksccvSY0ZS/AnuTDJl5J8OclVfR7rWP7MlMbmGIaH9KRjOd+XLV8xsdv0New18qGeJCcD/wu4ANgN3JVke1V9sY/j+WempsqIhodOVMd6vk/qNoe2O97G0eP/KeDLVfWVqvo+8EHgojHUIUlNSlWN9oDJq4ELq+rfdMuXAS+tqtcftt4WYEu3+ALgS0/z0CuBrz/NfYzDtNYN01v7tNYN01v7tNYNk137j1bVzOGN47iqZ75BySN++1TVVmDrcTtosrOqZo/X/kZlWuuG6a19WuuG6a19WuuG6ax9HEM9u4F1c5afA+wZQx2S1KRxBP9dwJlJnptkOfCrwPYx1CFJTRr5UE9VHUjyeuDjwMnAe6rqCyM49HEbNhqxaa0bprf2aa0bprf2aa0bprD2kX+4K0kaL7+5K0mNMfglqTEnVPAneU+SfUnuW+D91yT5fPe4I8lZo65xPovVPWe9c5M80X0XYiIMU3uSn0lyT5IvJPnkKOtbyBA/Kz+c5CNJ/qar+3WjrnEhSdYluTXJrq62K+dZJ0n+RwbTonw+yUvGUethNQ1T96Seo4vWPmfdiTtPj1BVJ8wD+GngJcB9C7y/ETi9e/2LwJ3jrnmYurt1Tgb+EvgY8Opx17yE//PTgC8C67vlVeOueci63wL8l+71DPB3wPJx193VswZ4Sff6VOBvgR8/bJ1XAX/B4HszL5uEn/Uh657Uc3TR2rv3JvI8PfxxQvX4q+p2BifoQu/fUVXf6BY/zeA7BGO3WN2dNwAfBvb1X9Hwhqj9XwI3VNVD3foTUf8QdRdwagazoJ3SrXtgFLUtpqr2VtVnu9ePA7uAtYetdhHwJzXwaeC0JGtGXOpTDFP3BJ+jw/yfw4Sep4c7oYJ/iS5n0COaeEnWAr8MvHPctRyD5wOnJ7ktyd1JXjvugob0P4EXMfhy4b3AlVV1cLwlHSnJBuAc4M7D3loLPDxneTfzB9VYHKXuuSbyHF2o9mk6T5u8EUuSn2XwQ/WKcdcypKuBN1fVE5m+aXiXAf8YOB94FvCpJJ+uqr8db1mL+gXgHuCVwI8BtyT5q6r69lirmiPJKQx6l2+cp66hpkYZh0XqPrTORJ6ji9R+NVNynjYX/El+EngX8ItV9di46xnSLPDB7odpJfCqJAeq6s/GWtVwdgNfr6rvAt9NcjtwFoMx0kn2OuCtNRi4/XKSrwIvBD4z3rIGkjyDQQC9v6pumGeViZwaZYi6J/YcHaL2qTlPmxrqSbIeuAG4bAp6nD9QVc+tqg1VtQG4Hvj1SfxhWsBNwD9JsizJPwBeymB8dNI9xOCvFJKsZjBD7FfGWlGn+9zh3cCuqvrDBVbbDry2u7rnZcC3qmrvyIqcxzB1T+o5Okzt03SenlA9/iQfAH4GWJlkN/C7wDMAquqdwH8GfgR4e/db+UBNwKx6Q9Q9sRarvap2JbkZ+DxwEHhXVR31stVRGOL//PeB9yW5l8GwyZuralKm3j0PuAy4N8k9XdtbgPXwg/o/xuDKni8Df8/gL5hxG6buiTxHGa72qeGUDZLUmKaGeiRJBr8kNcfgl6TGGPyS1BiDX5IaY/BLUmMMfk29JN8Zdw3SNDH4JakxBr9OGN30BP8tyX1J7k2yqWs/KcnbuxtofDTJx452k4zuRhp3dDdh+UySU5OsSPLebr+f6yYRI8mvJbkhyc1JHkjyX7v2k5O8b04tv9m135Zktnu9MsmDc/bzZxnc/OWrSV6f5E3dsT6d5IxuvR/rjnV3kr9K8sJe/1N1QjqhpmxQ834FOJvBJHArgbu6SeHOAzYAPwGsYjBX0Hvm20GS5cC1wKaquivJs4H/B1wJUFU/0YXtJ5I8v9vsbAbT9H4P+FKSt3XHWVtVL+72e9oQ9b+4288KBlMtvLmqzknyR8BrGcz+uBX4d1X1QJKXAm9nMIOoNDSDXyeSVwAfqKongEczuM3juV37h7r59B9JcutR9vECYG9V3QVwaOrdJK8A3ta13Z/kawzuNQCwo6q+1a33ReBHgS8Az+t+Cfw58Ikh6r+1u8nH40m+BXyka78X+MluSuCNwIfmTPv7zCH2Kz2Fwa8TyUKToC9lcvQw/7z1R9vH9+a8fgJYVlXfyOB+sb8A/AZwKfCvGdzF69AQ64qj7OfgnOWDDM7Vk4BvVtXZi/8zpIU5xq8Tye3Apm58fYbBfXU/A/w18M+7sf7VDGblXMj9wD9Mci5AN76/rNv3a7q25zOYlfFLC+0kyUrgpKr6MPCfGNzfF+BBBjemAVjSzbi7vz6+muSS7hjJhNyMXNPFHr9OJDcCLwf+hkGv/ber6pEkH2Ywt/59DG4Acyfwrfl2UFXf7z4UfluSZzEY3/85BmPp7+ymaT4A/FpVfS8L32lpLfDeJIc6V7/TPf8BcF2SyxjclHupXgO8I8l/ZDCN9Ae7f680NKdlVhOSnFJV30nyIwz+Cjivqh4Zd13SONjjVys+2l1Zsxz4fUNfLbPHr2YluRF47mHNb66qj4+jHmlUDH5JaoxX9UhSYwx+SWqMwS9JjTH4Jakx/x8BanyTRHWxPwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(consume_data.log_consume);" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "scaler = StandardScaler()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "consume_data_scaled=pd.DataFrame(scaler.fit_transform(consume_data), columns=consume_data.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "linear_model_2 = smf.ols(\"log_consume ~ speed + temp_outside + AC + rain + sun + snow + gas_type_SP98\", data=consume_data).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: log_consume R-squared: 0.229
Model: OLS Adj. R-squared: 0.214
Method: Least Squares F-statistic: 15.62
Date: Sun, 17 Oct 2021 Prob (F-statistic): 6.11e-18
Time: 20:45:18 Log-Likelihood: 163.29
No. Observations: 376 AIC: -310.6
Df Residuals: 368 BIC: -279.1
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Intercept 1.7727 0.032 56.115 0.000 1.711 1.835
speed -0.0026 0.001 -4.271 0.000 -0.004 -0.001
temp_outside -0.0085 0.001 -6.511 0.000 -0.011 -0.006
AC 0.0664 0.032 2.101 0.036 0.004 0.128
rain 0.0958 0.027 3.500 0.001 0.042 0.150
sun -0.0320 0.032 -1.005 0.316 -0.095 0.031
snow -0.0436 0.084 -0.521 0.602 -0.208 0.121
gas_type_SP98 -0.0095 0.017 -0.563 0.574 -0.043 0.024
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 111.195 Durbin-Watson: 2.112
Prob(Omnibus): 0.000 Jarque-Bera (JB): 366.740
Skew: 1.317 Prob(JB): 2.31e-80
Kurtosis: 7.059 Cond. No. 468.


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: log_consume R-squared: 0.229\n", + "Model: OLS Adj. R-squared: 0.214\n", + "Method: Least Squares F-statistic: 15.62\n", + "Date: Sun, 17 Oct 2021 Prob (F-statistic): 6.11e-18\n", + "Time: 20:45:18 Log-Likelihood: 163.29\n", + "No. Observations: 376 AIC: -310.6\n", + "Df Residuals: 368 BIC: -279.1\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "=================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "---------------------------------------------------------------------------------\n", + "Intercept 1.7727 0.032 56.115 0.000 1.711 1.835\n", + "speed -0.0026 0.001 -4.271 0.000 -0.004 -0.001\n", + "temp_outside -0.0085 0.001 -6.511 0.000 -0.011 -0.006\n", + "AC 0.0664 0.032 2.101 0.036 0.004 0.128\n", + "rain 0.0958 0.027 3.500 0.001 0.042 0.150\n", + "sun -0.0320 0.032 -1.005 0.316 -0.095 0.031\n", + "snow -0.0436 0.084 -0.521 0.602 -0.208 0.121\n", + "gas_type_SP98 -0.0095 0.017 -0.563 0.574 -0.043 0.024\n", + "==============================================================================\n", + "Omnibus: 111.195 Durbin-Watson: 2.112\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 366.740\n", + "Skew: 1.317 Prob(JB): 2.31e-80\n", + "Kurtosis: 7.059 Cond. No. 468.\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linear_model_2.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- This time we can see that we got a R2 of 0.229, a slightly improvement, \n", + "- We can see from the p-values that `sun`, `snow` and `gas_type` are still non-significant." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### From the regression analisis, we can say that the variables whic are significant to predict the mean of consumption are `rain`, `AC`, `temp_outside` and `speed`. Which goes in line with the correlation analysis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CONSUME COST" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We'll enrich the data with the price of the gas" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "def price(x):\n", + " E10_price = 1.379\n", + " SP98_price = 1.459\n", + " if x == 0:\n", + " return E10_price\n", + " else:\n", + " return SP98_price" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data['price'] = consume_data.gas_type_SP98.apply(price)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "consume_data['cost_per_distance'] =(((consume_data.consume)/100) * (consume_data.distance)) * consume_data.price " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cost_per_distance
gas_type_SP98
01.396125
11.305041
\n", + "
" + ], + "text/plain": [ + " cost_per_distance\n", + "gas_type_SP98 \n", + "0 1.396125\n", + "1 1.305041" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "price_gr = consume_data.groupby('gas_type_SP98').agg({'cost_per_distance': 'mean'})\n", + "price_gr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- From the above data frame we can see that the cars with ethanol gas-type, have a higher cost consumption, even if the price of ethanol is lower." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ironcon", + "language": "python", + "name": "ironcon" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}