Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
687 changes: 687 additions & 0 deletions Code/0.Exploration.ipynb

Large diffs are not rendered by default.

343 changes: 343 additions & 0 deletions Code/1.Transformation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"# All the libraries we are using:\n",
"import pandas as pd\n",
"import numpy as np\n",
"import requests\n",
"from bs4 import BeautifulSoup\n"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"# let's load the csv file into a dataframe\n",
"df = pd.read_csv('../Data/cleaned_mes.csv', index_col='Unnamed: 0') # To avoid using index_col here I could have used the argument index=False"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>distance</th>\n",
" <th>consume</th>\n",
" <th>speed</th>\n",
" <th>temp_inside</th>\n",
" <th>temp_outside</th>\n",
" <th>gas_type</th>\n",
" <th>AC</th>\n",
" <th>rain</th>\n",
" <th>sun</th>\n",
" <th>snow</th>\n",
" <th>temp_gradient</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>313</th>\n",
" <td>11.3</td>\n",
" <td>4.3</td>\n",
" <td>38</td>\n",
" <td>22.0</td>\n",
" <td>17</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>15.4</td>\n",
" <td>4.1</td>\n",
" <td>45</td>\n",
" <td>22.0</td>\n",
" <td>24</td>\n",
" <td>E10</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>298</th>\n",
" <td>16.3</td>\n",
" <td>4.5</td>\n",
" <td>58</td>\n",
" <td>22.0</td>\n",
" <td>16</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>12.3</td>\n",
" <td>5.2</td>\n",
" <td>55</td>\n",
" <td>21.5</td>\n",
" <td>12</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-9.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>279</th>\n",
" <td>24.7</td>\n",
" <td>4.5</td>\n",
" <td>26</td>\n",
" <td>22.0</td>\n",
" <td>10</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>5.3</td>\n",
" <td>4.1</td>\n",
" <td>34</td>\n",
" <td>21.5</td>\n",
" <td>9</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-12.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>187</th>\n",
" <td>12.3</td>\n",
" <td>4.8</td>\n",
" <td>41</td>\n",
" <td>22.5</td>\n",
" <td>7</td>\n",
" <td>E10</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-15.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>16.2</td>\n",
" <td>5.2</td>\n",
" <td>29</td>\n",
" <td>21.0</td>\n",
" <td>0</td>\n",
" <td>E10</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>11.8</td>\n",
" <td>4.3</td>\n",
" <td>37</td>\n",
" <td>20.0</td>\n",
" <td>6</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-14.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>309</th>\n",
" <td>31.9</td>\n",
" <td>4.3</td>\n",
" <td>33</td>\n",
" <td>22.0</td>\n",
" <td>16</td>\n",
" <td>SP98</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>-6.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" distance consume speed temp_inside temp_outside gas_type AC \\\n",
"313 11.3 4.3 38 22.0 17 SP98 False \n",
"339 15.4 4.1 45 22.0 24 E10 False \n",
"298 16.3 4.5 58 22.0 16 SP98 False \n",
"70 12.3 5.2 55 21.5 12 SP98 False \n",
"279 24.7 4.5 26 22.0 10 SP98 False \n",
"100 5.3 4.1 34 21.5 9 SP98 False \n",
"187 12.3 4.8 41 22.5 7 E10 False \n",
"179 16.2 5.2 29 21.0 0 E10 False \n",
"90 11.8 4.3 37 20.0 6 SP98 False \n",
"309 31.9 4.3 33 22.0 16 SP98 False \n",
"\n",
" rain sun snow temp_gradient \n",
"313 False False False -5.0 \n",
"339 False False False 2.0 \n",
"298 False False False -6.0 \n",
"70 False False False -9.5 \n",
"279 False False False -12.0 \n",
"100 False False False -12.5 \n",
"187 False False False -15.5 \n",
"179 False False False -21.0 \n",
"90 False False False -14.0 \n",
"309 False False False -6.0 "
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# As we are focusing on the price I am going to create 2 new columns, one with the consume per unit of distance and the other one with a price.\n",
"# I dont have any data of the dates this services were delivered and thus I can not inffer how much did it cost to deliver them, however,\n",
"# I am going to use to prices of this gas types today (17/03/2022): SP98 1.955€/l, E10 1.825€/l\t\n",
"# Source: https://www.dieselogasolina.com/\n",
"df.sample(10)"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"# I have also created a little function to get the price in real time, might be useful for future uses of this code,\n",
"def find_price():\n",
" page = requests.get('https://www.dieselogasolina.com/')\n",
" soup = BeautifulSoup(page.content, 'html.parser')\n",
" table = soup.find('table').find_all('tr')\n",
" p_E10 = table[1].find_all('td')[1].text\n",
" p_SP98 = table[2].find_all('td')[1].text\n",
" return float(p_SP98[:5].replace(',','.')), float(p_E10[:5].replace(',','.'))"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"# Let's start by creating a function to transform the dataframe:\n",
"def df_trans(dis, con, gt, prices = find_price()): # Where x is a row in our dataframe, datos de https://www.dieselogasolina.com/\n",
" \"\"\" This function is aimed to transform the dataframe and create 2 new columns with the consume per unit of distance and the price per unit\n",
" of distance, it would also be easy to include a column with the total price (distance*price/distance). It should be used as:\n",
" df[new_columns] = df.apply(lambda x: df_trans(x.distance, x.consume,x.speed,prices=[...]), axis=1)\n",
" It requires an argument with the prices of the gas we are using: [price_SP98, price_E10]\"\"\"\n",
" #print(gt)\n",
" cpd = con/dis # The consume per distance is the first thing we can calculate as it doesn't require any transformation\n",
"\n",
"\n",
" price = dict({'SP98':prices[0], 'E10':prices[1]})\n",
"\n",
" ppd = price[gt]*cpd # We can already know the price per distance\n",
"\n",
" return pd.Series({'con_dis':cpd, 'price_dis':ppd})"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"fp = find_price()\n",
"df['gas_price'] = df.gas_type.apply(lambda x: fp[0] if x == 'SP98' else fp[1])"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"df[['con_dis', 'price_dis']] = df.apply(lambda x:df_trans(x.distance,x.consume,x.gas_type), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"# Now that we have transformed the Dataframe let's save it and work with it in another jupyter,\n",
"df.to_csv('../Data/cleaned_mes.csv', index=False)"
]
}
],
"metadata": {
"interpreter": {
"hash": "4034f9195f5552b4454ef60198efa491d941068725cfe9b8182a5b0158f58c43"
},
"kernelspec": {
"display_name": "Python 3.8.12 ('ironhack')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading