diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..c368a9f --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,1259 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cd79f124", + "metadata": {}, + "source": [ + "# ¿Cuál es el mejor combustible?" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8aceb019", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6ff8121b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "df = pd.read_csv('measurements.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "747246d5", + "metadata": {}, + "outputs": [], + "source": [ + "xls = pd.ExcelFile('measurements2.xlsx')\n", + "df2 = pd.read_excel(xls)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "360f0de2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "distance object\n", + "consume object\n", + "speed int64\n", + "temp_inside object\n", + "temp_outside int64\n", + "specials object\n", + "gas_type object\n", + "AC int64\n", + "rain int64\n", + "sun int64\n", + "dtype: object" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "7d906165", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(388, 9)" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "0d964ccf", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "distance 0\n", + "consume 0\n", + "speed 0\n", + "temp_inside 12\n", + "temp_outside 0\n", + "specials 295\n", + "gas_type 0\n", + "AC 0\n", + "rain 0\n", + "sun 0\n", + "refill liters 375\n", + "refill gas 375\n", + "dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "id": "30d265db", + "metadata": {}, + "source": [ + "### Debido a que el número de nulos de las columnas refill liters y refill gas es 375/388, la información no es útil y elimino las columnas para reducir el número de datos con el que trabajamos" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "8a7204d9", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"refill liters\",\"refill gas\"],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "fa59bd3a", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsidespecialsgas_typeACrainsun
02852621,512NaNE10000
1124,23021,513NaNE10000
211,25,53821,515NaNE10000
312,93,93621,514NaNE10000
418,54,54621,515NaNE10000
.................................
383163,73924,518NaNSP98000
38416,14,3382531ACSP98100
385163,8452519NaNSP98000
38615,44,6422531ACSP98100
38714,75252530ACSP98100
\n", + "

388 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside specials gas_type AC \\\n", + "0 28 5 26 21,5 12 NaN E10 0 \n", + "1 12 4,2 30 21,5 13 NaN E10 0 \n", + "2 11,2 5,5 38 21,5 15 NaN E10 0 \n", + "3 12,9 3,9 36 21,5 14 NaN E10 0 \n", + "4 18,5 4,5 46 21,5 15 NaN E10 0 \n", + ".. ... ... ... ... ... ... ... .. \n", + "383 16 3,7 39 24,5 18 NaN SP98 0 \n", + "384 16,1 4,3 38 25 31 AC SP98 1 \n", + "385 16 3,8 45 25 19 NaN SP98 0 \n", + "386 15,4 4,6 42 25 31 AC SP98 1 \n", + "387 14,7 5 25 25 30 AC SP98 1 \n", + "\n", + " rain sun \n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "383 0 0 \n", + "384 0 0 \n", + "385 0 0 \n", + "386 0 0 \n", + "387 0 0 \n", + "\n", + "[388 rows x 10 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "d5f70d2f", + "metadata": {}, + "source": [ + "¿Afecta la distancia al consumo? ¿A más kms más gasto % de gasolina?\n", + "¿Afecta la temperatura al consumo? ¿A más temperatura más gasto % de gasolina?\n", + "¿Afecta la velociadad al consumo? ¿A más velocidad más gasto % de gasolina?\n", + "¿Cómo afectan el sol y la lluvia a cada combustible?\n", + "¿Cómo afecta la temperatura a cada combustible?" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "4175963d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['E10', 'SP98']" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['gas_type'].unique().tolist()" + ] + }, + { + "cell_type": "markdown", + "id": "b35b52f1", + "metadata": {}, + "source": [ + "### Voy a analizar la columna specials, debido a que tiene un número elevado de nulos y veo que la información que contiene puede estar en otras columnas " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "aaec3e81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "rain 32\n", + "sun 27\n", + "AC rain 9\n", + "ac 8\n", + "AC 6\n", + "snow 3\n", + "sun ac 3\n", + "AC snow 1\n", + "half rain half sun 1\n", + "AC sun 1\n", + "AC Sun 1\n", + "ac rain 1\n", + "Name: specials, dtype: int64" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"specials\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "3e663272", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 358\n", + "1 30\n", + "Name: AC, dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"AC\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "d856b0d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(32, 10)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Identificando que los campos con rain en specials están identificados en la columna rain.\n", + "Todos están reflejados en la columna rain.\n", + "\"\"\"\n", + "dfrain = df[(df.specials==\"rain\")&(df.rain ==1)]\n", + "dfrain2 = df[(df.specials==\"Ac rain\")&(df.rain ==1)]\n", + "dfrain3 = df[(df.specials==\"ac rain\")&(df.rain ==1)]\n", + "dfrain.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "c70baf46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(27, 10)" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Identificando que los campos con sun en specials están identificados en la columna sun.\n", + "La mayoría están reflejados en la columna sun.\n", + "\"\"\"\n", + "dfsun = df[(df.specials==\"sun\")&(df.sun ==1)]\n", + "dfsun2 = df[(df.specials==\"sun ac\")&(df.sun ==1)]\n", + "dfsun3 = df[(df.specials==\"AC sun\")&(df.sun ==1)]\n", + "dfsun4 = df[(df.specials==\"ac sun\")&(df.sun ==1)]\n", + "dfsun.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "d5cdf9ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9, 10)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Identificando que los campos con AC en specials están identificados en la columna AC.\n", + "Todos están reflejados en la columna sun.\n", + "\"\"\"\n", + "dfac = df[(df.specials==\"AC rain\")&(df.AC ==1)]\n", + "dfac2 = df[(df.specials==\"sun ac\")&(df.AC ==1)]\n", + "dfac3 = df[(df.specials==\"AC sun\")&(df.AC ==1)]\n", + "dfac4 = df[(df.specials==\"ac sun\")&(df.AC ==1)]\n", + "dfac5 = df[(df.specials==\"AC snow\")&(df.AC ==1)]\n", + "dfac.shape" + ] + }, + { + "cell_type": "markdown", + "id": "fca03589", + "metadata": {}, + "source": [ + "### Después de explorar los datos de la columna specials, podemos descartarla, debido a que la información que nos da está recogida en las columnas AC,sun & rain." + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "388ae869", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"specials\"],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "21d55a03", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsidegas_typeACrainsun
02852621,512E10000
1124,23021,513E10000
211,25,53821,515E10000
312,93,93621,514E10000
418,54,54621,515E10000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside gas_type AC rain sun\n", + "0 28 5 26 21,5 12 E10 0 0 0\n", + "1 12 4,2 30 21,5 13 E10 0 0 0\n", + "2 11,2 5,5 38 21,5 15 E10 0 0 0\n", + "3 12,9 3,9 36 21,5 14 E10 0 0 0\n", + "4 18,5 4,5 46 21,5 15 E10 0 0 0" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "da84d413", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "distance 0\n", + "consume 0\n", + "speed 0\n", + "temp_inside 12\n", + "temp_outside 0\n", + "gas_type 0\n", + "AC 0\n", + "rain 0\n", + "sun 0\n", + "dtype: int64" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "64e62883", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "distance object\n", + "consume object\n", + "speed int64\n", + "temp_inside object\n", + "temp_outside int64\n", + "gas_type object\n", + "AC int64\n", + "rain int64\n", + "sun int64\n", + "dtype: object" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "33da2b7d", + "metadata": {}, + "outputs": [], + "source": [ + "df['distance'] = df['distance'].replace([','],'.')" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "299f6c4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceconsumespeedtemp_insidetemp_outsidegas_typeACrainsun
02852621,512E10000
1124,23021,513E10000
211,25,53821,515E10000
312,93,93621,514E10000
418,54,54621,515E10000
\n", + "
" + ], + "text/plain": [ + " distance consume speed temp_inside temp_outside gas_type AC rain sun\n", + "0 28 5 26 21,5 12 E10 0 0 0\n", + "1 12 4,2 30 21,5 13 E10 0 0 0\n", + "2 11,2 5,5 38 21,5 15 E10 0 0 0\n", + "3 12,9 3,9 36 21,5 14 E10 0 0 0\n", + "4 18,5 4,5 46 21,5 15 E10 0 0 0" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "6b4fa21b", + "metadata": {}, + "outputs": [], + "source": [ + "df4 = pd.to_numeric(df[\"distance\"], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "0905b2fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 28.0\n", + "1 12.0\n", + "2 NaN\n", + "3 NaN\n", + "4 NaN\n", + " ... \n", + "383 16.0\n", + "384 NaN\n", + "385 16.0\n", + "386 NaN\n", + "387 NaN\n", + "Name: distance, Length: 388, dtype: float64" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "bc5c1241", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "distance object\n", + "consume object\n", + "speed int64\n", + "temp_inside object\n", + "temp_outside int64\n", + "gas_type object\n", + "AC int64\n", + "rain int64\n", + "sun int64\n", + "dtype: object" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4198bd8f", + "metadata": {}, + "outputs": [], + "source": [ + "for i,row in df.iterrows():" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "8f2b8ed0", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "could not convert string to float: '11,2'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf2\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'distance'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'consume'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'temp_inside'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5813\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5814\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5815\u001b[0;31m \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5816\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5817\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 418\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 419\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m def convert(\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 327\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 328\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignore_failures\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 591\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_array_safe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 592\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmaybe_coerce_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1308\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1309\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1310\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mValueError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0;31m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1257\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;31m# in pandas we don't store numpy str dtypes, so convert to object\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0mflat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1095\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1096\u001b[0m \u001b[0;31m# error: Item \"ExtensionArray\" of \"Union[ExtensionArray, ndarray]\" has no\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[0;31m# attribute \"reshape\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1200\u001b[0m \u001b[0;31m# Explicit copy, or required since NumPy can't view from / to object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1202\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1203\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '11,2'" + ] + } + ], + "source": [ + "df2 =df[['distance', 'consume','temp_inside']].astype(float)" + ] + }, + { + "cell_type": "markdown", + "id": "cfe6cd55", + "metadata": {}, + "source": [ + "### Voy a revisar la correlación entre columnas para revisar si encontramos algún dato interesante" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "28b703bc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speedtemp_outsideACrainsun
speed1.0000000.015411-0.0354080.0094890.081618
temp_outside0.0154111.0000000.167562-0.1863150.346903
AC-0.0354080.1675621.0000000.2429150.088598
rain0.009489-0.1863150.2429151.000000-0.112650
sun0.0816180.3469030.088598-0.1126501.000000
\n", + "
" + ], + "text/plain": [ + " speed temp_outside AC rain sun\n", + "speed 1.000000 0.015411 -0.035408 0.009489 0.081618\n", + "temp_outside 0.015411 1.000000 0.167562 -0.186315 0.346903\n", + "AC -0.035408 0.167562 1.000000 0.242915 0.088598\n", + "rain 0.009489 -0.186315 0.242915 1.000000 -0.112650\n", + "sun 0.081618 0.346903 0.088598 -0.112650 1.000000" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "markdown", + "id": "520aa3d3", + "metadata": {}, + "source": [ + "### No encontramos datos definitivos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b1ad5a5", + "metadata": {}, + "outputs": [], + "source": [ + "¿Afecta la distancia al consumo? ¿A más kms más gasto % de gasolina?\n", + "¿Afecta la temperatura al consumo? ¿A más temperatura más gasto % de gasolina?\n", + "¿Afecta la velociadad al consumo? ¿A más velocidad más gasto % de gasolina?\n", + "¿Cómo afectan el sol y la lluvia a cada combustible?\n", + "¿Cómo afecta la temperatura a cada combustible?" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "5195c7e5", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_context(\"poster\")\n", + "sns.set(rc={\"figure.figsize\": (12.,6.)})\n", + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "9cc5c488", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df[\"gas_type\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "3aae6352", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df[\"temp_inside\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "89212494", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df[\"temp_outside\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}