From 6155cce4dab038e4ec77d83eae098932bc7bf64c Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Sat, 29 Sep 2018 06:21:54 +0000 Subject: [PATCH 1/6] Done --- __pycache__/__init__.cpython-36.pyc | Bin 0 -> 153 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 171 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 755 bytes q01_missing_value/build.py | 25 ++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 177 bytes .../test_q01_imputation.cpython-36.pyc | Bin 0 -> 2326 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 173 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 805 bytes q02_outlier_removal/build.py | 33 +++++++++++++++--- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 179 bytes .../test_q02_outlier_removal.cpython-36.pyc | Bin 0 -> 1789 bytes 11 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 __pycache__/__init__.cpython-36.pyc create mode 100644 q01_missing_value/__pycache__/__init__.cpython-36.pyc create mode 100644 q01_missing_value/__pycache__/build.cpython-36.pyc create mode 100644 q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc create mode 100644 q02_outlier_removal/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_outlier_removal/__pycache__/build.cpython-36.pyc create mode 100644 q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af63a6f3bbfe476dc79ebba264131d4e65b38c11 GIT binary patch literal 153 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFE{;+{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{Hq)$ literal 0 HcmV?d00001 diff --git a/q01_missing_value/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..354530f75a902270f9869d1dbb6279634ba16b24 GIT binary patch literal 171 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuL%8&{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{&ryk0@&Ee@O9{FKt1R6CH3#X!se0I9$&(f|Me literal 0 HcmV?d00001 diff --git a/q01_missing_value/__pycache__/build.cpython-36.pyc b/q01_missing_value/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eff507292b32c351396b0f155848f86947f831fa GIT binary patch literal 755 zcmY*XJCD;q5Z?9Mz9c#w1St)DnwS#}#pwhBaY8zxlfpwXMYQLgB+l7uGiyf*f-1E9 z5Gwx9wp0}S0V-yblQ>qI*X}ppJhYGd{p9!P$A_WM*iUxn1mr&@(O0A-le}aZ-&-eh z$vUN1`I*ldxa&Z=JD!CQN>BPSkfDs0{)lCfjAinbWiiAM5+Z?w6Vr^1kA4v=n~u2& zUsX-l0)84u(TcQ1QK(6-Hzs*4^gD&m5QK>@@UqzK-RQDzXDiWwNiGD6CA`^^AJ0Sy zS6JkbbIVM}A@N9_ljxFEOQoeF-EYnp{+X|N>qu|OM{LJAquBiyAG|Y0md03nx7Ob= z8CblH=`~n~7P-GU_=Mi^g+l{|Q{zs>)OZyLHFnIPDzq-ta=sB|2Re&1P1?e8i!QZ> zcH3w>A4g~#H}0ZnO>b3q`y{&>6H|uLiyA8vEQ+#Jg4k;b#%ZL9Ul8OaiVCP$qPFwf z)GR8hW^SAK&hQ>F|9P9P>IzaUgu1S`r%%%NHQwk(=Ag8Xp;7brpdf)tI0W zN(zm~^pXwrO$h?kq(Pu!Iqa8pSVcrXK+fq);0ZyoXO*QhJl{`rN8PvA(HU)}E8*O3 SRu4!;JmNhkaZbFb7xBMQ(8!el literal 0 HcmV?d00001 diff --git a/q01_missing_value/build.py b/q01_missing_value/build.py index 7dc4d18..1edbf02 100644 --- a/q01_missing_value/build.py +++ b/q01_missing_value/build.py @@ -1,6 +1,8 @@ +# %load q01_missing_value/build.py # Default imports +import numpy as np import pandas as pd - +from sklearn.preprocessing import Imputer # Data loading ny_housing = pd.read_csv('data/train.csv') # Selecting 4 most relevant variables along with target variable from the dataset fot the Cleaning and Preprocessing. @@ -8,3 +10,24 @@ # Write your code here: +def imputation(ny_housing): +# mean=housing_data['MasVnrArea'].loc[housing_data['MasVnrArea'].notnull()].mean() +# housing_data['MasVnrArea']=housing_data['MasVnrArea'].fillna(mean) +# highly_occured=housing_data['GarageType'].loc[housing_data['GarageType'].notnull()].value_counts().index[0] +# housing_data['GarageType']=housing_data['GarageType'].fillna(highly_occured) +# return housing_data[['MasVnrArea','GarageType']] + + imp_mean = Imputer(missing_values = 'NaN', strategy='mean') + imp_mean.fit(housing_data[['MasVnrArea']]) + housing_data[['MasVnrArea']] = imp_mean.transform(housing_data[['MasVnrArea']]) + housing_data['GarageType'] = housing_data['GarageType'] .fillna( housing_data['GarageType'].mode()[0]) + return pd.DataFrame(housing_data['MasVnrArea']),pd.DataFrame(housing_data['GarageType']) +c=imputation(ny_housing) +c + + + + + + + diff --git a/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cca5f8f024461d660d7c66458ccc75b67888687e GIT binary patch literal 177 zcmXr!<>lh_S{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuQ>gT{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{d}dx|NqoFsLFFwDo80`A(wtN~kiEq~%m4u3F)%;? literal 0 HcmV?d00001 diff --git a/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..689bc9093d3b209f775aeb203eab2bdcf3c94b9a GIT binary patch literal 2326 zcmcIlTaOzx6t?G*Om0hAO6hVbgJ4BRsy6KM0#b#FO1oNAQp8pu4G&Xy>|`fQJgMzj zk*GYS{{`_=_zQmJY5xLGoHLUq+0{xc0uuSyV;^6>&)+%zwAX9*zW(*c-y4MdK{kHy zP3}R_91JI%=43)+O0n(aPV91*d$~99W54S2^TwnZHz^VBIN*&1ja#C{n>^qx-sYW= zzeD1-@I@O&I){XJdG96RJwX?4+~v+OiMGyQClXN`JQ3R5XIc!-oVur-O{S$`CMzV2 zZ;gZ*mU+(9NKZvN-{Q=$zELcbd#OH++utz#y;NT+!Q$4TdX$}3&EQdCj!)QB#O*_- z*hoB?LA!O#a`COoQjy~JuYS1kJt+Dv7(rsnN$haSotLDB4zhL<`MJNjsPMAqg z!cj%)d5_3l6RhxRBS*ENQXMqNXKfIay?OEQ%eC}2jMk7TK>qk|AitWmf{jvWLaAWi zh6cz9goR=<&e$D$V;|(ysCyQ4LriWhrupMF0T5ZCfj9|=g$k8`tClqzVWz{UQzkjn z;aA}GP_c=4dMBI}C8$p#KI6N^sP zm-_*tt}+H_VSf8EC0sV`-=jx8Tphi78{8FMk$M-+M!JYQYux)+27a&$%lvcT2ZN|( zgCvmg}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuPFVD{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{stQF88?410K%&0ZCgt;;kGTP9ls52(R_%~{j(dRB_ zHA`yE3VEJaYZJd=)qAO5X<(*xrLT+i0Y=xV9?w|`Cca{t&EVYzacj&3ywybxIkj@Q zT-?|Pd5ydJhGvWT24wNi(>=kHvnM-xym7a5f54ZP;~crx-*UY@S@_%?keze9cemKB z#rDM_*xGmAfN+259Z|5v!J&e`r@Q|gtiy8Z&{o9nad0N#NIO`uw)JbOZ7s-v%%c^8KB)Q^{Oo7$N z&p^%!381ZLsnr7VI{EnIS*Ds=6reK=D}_}hlco^-yxf@Hy_*hZtGZ|ieMI~8G3|$E z^vuzHj0`&_D4FD}GET|UumUzwTBdF#D|5PHbtx34nV_ykNWS%MDQXkp2jUt2C~ehs m&l)RgXntt%jybjA=yIiaBjAM{`JtkSMl^O}r|(9+i2enN4A8Iu literal 0 HcmV?d00001 diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py index 74df5f2..0f266b4 100644 --- a/q02_outlier_removal/build.py +++ b/q02_outlier_removal/build.py @@ -1,10 +1,35 @@ +# %load q02_outlier_removal/build.py # Default imports import pandas as pd - +import seaborn as sns +import matplotlib.pyplot as plt # Data -ny_housing = pd.read_csv('data/train.csv') +df = pd.read_csv('data/train.csv') # Selecting 4 most relevant variables from the dataset fot the Cleaning and Preprocessing. -housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] - +housing_data = df[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] # Write your code here: +def outlier_removal(housing_data): + mean=housing_data['MasVnrArea'].loc[housing_data['MasVnrArea'].notnull()].mean() + housing_data['MasVnrArea']=housing_data['MasVnrArea'].fillna(mean) + highly_occured=housing_data['GarageType'].loc[housing_data['GarageType'].notnull()].value_counts().index[0] + housing_data['GarageType']=housing_data['GarageType'].fillna(highly_occured) + + +# housing_data['MasVnrArea']=housing_data.drop(housing_data['MasVnrArea']>456.0) +# housing_data['GrLivArea']=housing_data.drop(housing_data['GrLivArea']>2466.1) +# housing_data['SalePrice']=housing_data.drop(housing_data['SalePrice']>326100.0) +# return housing_data + housing_data = housing_data.drop(housing_data[(housing_data['GrLivArea']>3000) & (housing_data['GrLivArea']<6000)].index) + housing_data = housing_data.drop(housing_data[(housing_data['MasVnrArea']>3000) & (df['MasVnrArea']<6000)].index) + + return housing_data +c=outlier_removal(housing_data) +c + + + + + + + diff --git a/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6d2f0d0cb4a4d42f93f504f05a23b1f1590ae13 GIT binary patch literal 179 zcmXr!<>iu`ygZr#2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUkUmd`MIh3MVX0t zN%@tA7W(1&McKs#iOH$@$@wX%`e~_&C8b5F@u_*~nR%(HMVWc&@dZWsS*gh-`h^BY z@%g1CIhm;r*!*;F~ z`j7C|5{+b%XKcHYM;++Mw)A92`m#H9 zhb;2J1s}h3kD2Vr{#z#dz}I%vlhz3f2OFG;g}m}!fHseXhVjN~-n>_oCQCuZ3g*R` z$S`Oy1v4qLOsJ_|K(ZQ0VZ_KNk>-bqK8yVCg#IyCUn>w%=U6>U&uZ6uR+y7hv4F@w z7D`Ov#S;C_iOAr4l_ro-4eu_R{Sn^!01dE+OBPv@OY1Fb`p2guUD?8?OTFT=B|F%Q zU)`l9o?k9I;W!RmG_Z&FW4!ePG=>?z=D+hdY-Snz4VQe#);JMD-uXYV_So7c+&G0Z zRy5p(%<7XTzaazzGC&^X9CD+BeR&WZr)Q9(7dpztIEAass;~ zXvij&Lp1+ls)u3!E=H%t97ZY?`Kw}i|MSsLh5AJ=L;|CvkT9BnFr|VR@@bj_s5GC( z3suapj?wF*2l16P8yN(mYa3tl`EaqUh7@u&FQ%HPAWg+H%j2uhu3>w4W9}vDG)7JR zV{~?@H_S=gTf+8uV`J3ni!EFU6H#U+Kna$h16c%lVS>3Z$!RdHQD2S-?F;OR7Cf|+ zj}O%)LrpcNk6v}}+78YKyp0hJ0FD5T{x^X0^eqEY3vKP1L4Xb_4z*{jnLTr6uH>dg zO5D+zHC{cx0Dzk;Ft}xcNudG-D1f}75u`eJxe&P&I`{^ozExrlFAsucQKBZ{JQg() z?CY>y-D))WR6otN5qSbt_t(^%qGo9QD{CPuCn*$E?t})4I@>WThvpA?1M&*PLCuhk zf;w#v8`RV;;{$H-)u%Ud+ooJ1wN=aeTT(MiI?~>1#2s(;aA$33cdd?o*af2Lm_d!8 zP%cyg@AhdeA(X?1n5c$mP%ESmbq7u5*2yEZ6_@t)=LJKVGW10_}7OaK4? literal 0 HcmV?d00001 From 9a284e0317931b047ee353d9fac63d9052c22d3b Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Wed, 3 Oct 2018 04:59:49 +0000 Subject: [PATCH 2/6] Done --- .../__pycache__/build.cpython-36.pyc | Bin 805 -> 798 bytes q02_outlier_removal/build.py | 12 +++--------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/q02_outlier_removal/__pycache__/build.cpython-36.pyc b/q02_outlier_removal/__pycache__/build.cpython-36.pyc index 541cd33bdbd43e48aa4b85f0f4b6dd760e6c631e..d519716da0bb5350cde889da0adcd88308342de0 100644 GIT binary patch delta 239 zcmZ3=HjjL6hCDh>3xLp@&_n3tEU_UW2vKh}v{7Z_Jfyq)Zx#a6>m!;r!v$&kXD%>reyWivQ1NHS!x zHZ#@&Wua;qA!DFsYqyYKa;0&5lE(pA4CWNiCc`xMFJoW Pf)Jeii%F50pMwzq<;5#n diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py index 0f266b4..df0e65d 100644 --- a/q02_outlier_removal/build.py +++ b/q02_outlier_removal/build.py @@ -14,16 +14,10 @@ def outlier_removal(housing_data): housing_data['MasVnrArea']=housing_data['MasVnrArea'].fillna(mean) highly_occured=housing_data['GarageType'].loc[housing_data['GarageType'].notnull()].value_counts().index[0] housing_data['GarageType']=housing_data['GarageType'].fillna(highly_occured) - - -# housing_data['MasVnrArea']=housing_data.drop(housing_data['MasVnrArea']>456.0) -# housing_data['GrLivArea']=housing_data.drop(housing_data['GrLivArea']>2466.1) -# housing_data['SalePrice']=housing_data.drop(housing_data['SalePrice']>326100.0) -# return housing_data - housing_data = housing_data.drop(housing_data[(housing_data['GrLivArea']>3000) & (housing_data['GrLivArea']<6000)].index) - housing_data = housing_data.drop(housing_data[(housing_data['MasVnrArea']>3000) & (df['MasVnrArea']<6000)].index) - return housing_data + return housing_data[(housing_data['MasVnrArea']<=housing_data['MasVnrArea'].quantile(0.95)) & (housing_data['GrLivArea']<=housing_data['GrLivArea'].quantile(0.95)) & (housing_data['SalePrice']<=housing_data['SalePrice'].quantile(0.95)) ] + + c=outlier_removal(housing_data) c From 52f7dbc34c3c23392a61d4be7c486ba4fde5b9be Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Wed, 3 Oct 2018 07:44:50 +0000 Subject: [PATCH 3/6] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 170 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 933 bytes q03_skewness_log/build.py | 24 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 176 bytes .../test_q03_skewness_log.cpython-36.pyc | Bin 0 -> 2201 bytes 5 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 q03_skewness_log/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_skewness_log/__pycache__/build.cpython-36.pyc create mode 100644 q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc diff --git a/q03_skewness_log/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97499f8f4339e03b58370afd8fe36fe6118a2341 GIT binary patch literal 170 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuW=#N<@{HYo6u0x8^m>TnLpmnbTN>If422dU(WO+CT7d{d1YM=o=zMQ!R(mwKr^M3F~*8hk{N&wS=WM8E>bH_p*$>j$u+ z5muI*v3I8*`jk{8tOUvUP9hg7cuC|dE}jWSlz%KvvPG+UCq;EWAth76F%cwX7faCn zbCR<&ktHm_niiG^TX*mrw)_duKnK*Irmj%ew!#gjc!;KGBk>x6WvtQ4*m%swShrCJ zI>y?>YT3nFYcz#!fy>;Sn+?oC>pG)j=@2Fa{m-<|7-}}af4(DaLq#3__YM(8l{@u1 z{~E8XjRm=2*^M>zp<1W0Tj@41^|79)fim+VQ7$hkUgx>8=8SOVjI%uFMD-RVuUVWFHLs+yGfvri@XKPk`#5}Eh?y)&!osAWEF3dZ0b9&?nsLU2zOod>lqFSo^=LoVtei<1 z!)4)Ro#k|=T&mtpxBe{hJ^;c4`~dbtJTPAvecU%a>|t>aPDWd5OD0*l+>sTjWZSE9 zO2R3T$|$L_c|9+e;ugdz556sp;UA0JV2XhTO`>qpHgyB;w($q^g4Q{EswXJ>06ng}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuUP$z{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{E89&voo9S z9>Xr?w0Q?k#hH8)mP+RaaYdGM!C z$nRw7@nC%$hW-kKAcAIOL}N;E?PN~u3RifUH}YfOe)F@&s2MjYk?vj~8Z#QVq%T`D z`mGbUIV zWCphaP)qLvXPwMkgbeI^eTC-cD5h`U`yL`QQiKp>Ig+{2;T3T$+)fW<4y(vhkR+lo zGgXIcEtR?Vxa3*oW-^bQ%F{9%R31X~D^XoI_Sar-w;0KuN_qaMm~4H}`?^s3ddw5q zO9~--1IbOPB$N3t&81XnK4fE6?13M>$M4@@%US3dfI?fz&hBlFC)GOcpVDCHN(cPV z=reHxI(_dfo}1@uoq`#)EzEDhVj1QdWMt$5Qnx>M;neiQ1)4$*c$t~-fM=!Dp(w(< zFyV-sWH%gIY)_wv+!LG?W;k+G8zzO3SXgVZ0_+hHUho58xHxI9c zlcEHw$)f|1`tXX5ylSnc)_r|H*M{ebtU5>oDHtHQRrv!{uROFL(Ha@#+7c+C1tr;) z$v&%Z-&UWc2X~a@%xbU0d5xhFz{a7|t@F|O4+}3CB!$^s#^t(t1-__PL0lM=9iGY0 zRhq~e6wL5*f^x>fDNwLiSmvNlHFJcA-Du_=bsxV!Tx&f2;iaDrZ_Tm9rdPjMriv(_ zT24wu9(N~ON5`7apzmC&33DeK6}s+7>SZwZFF0JbG0n zf1|a%cM+}EAO8H?AKyG}KD(t}hkKqUTIc{9prVks7YS>Tmv=znVqq;*`cbP27|Z!c zGFG)28x^9=aNT7L+kL&GR?(p~6qixFh5{+1-b8Uyk#>c>N@2laBh&aPpg|+>0}rHY zccPUlDD%`{dK6|NZj5;@xUQVBsQgD|nu+?~(@b;x2OHZa;{d8!F!%?@?R>&^i&8@; fu>`PiS=;BiQw2Bc$Uef$tWRC&23^{9+A#hG&-h2f literal 0 HcmV?d00001 From 9c8a812cd6a63ce4e5393d07ae01a6fb2875d2d1 Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Fri, 5 Oct 2018 10:17:39 +0000 Subject: [PATCH 4/6] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 171 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 951 bytes q03_skewness_sqrt/build.py | 22 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 177 bytes .../test-q03_skewness_sqrt.cpython-36.pyc | Bin 0 -> 2072 bytes 5 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_skewness_sqrt/__pycache__/build.cpython-36.pyc create mode 100644 q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc diff --git a/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc b/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8b768f810a3a272835a767ae766a2c19259ce85 GIT binary patch literal 171 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuL%8&{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{T0mP^ZE4$8=>#$+G>Hl2cnJu z7-Bd>V;o^@Y?6`4YP20W+#0o*HOG<5U1l?fwV2Difzw5i$9xvNN0HBc?gAs=0h}kt zsJHSHvZ5Z=wmRo;4&QYcEooRvnu_&AO?B{`suLlfNKUoCFAvgbV|oX9c|4>A*TFuO zbiiNKVEe~3<3}=0c!Et@SRRP!;3(Jl+jTo0uM-AZ^(Bpm~03eq)TW-~Zz)>Z#< z=l<4F_xHE{<(K}~PbUwr`aL4e)T9s43$&HxiFQR^iYm*rJ?2#C)>)cmg6j5^W)+W< zyb`6-PAV9GtG!7@MVV$?6H&msPGl*G$>r%l)bTK{R4N9#y)o?; zB&j54ny3ws&B!r-V`HHm0mPD{uV zb1_H-=MwIW3z?62QihX9Td~P0xKgnxFg&f&jI9^7Ui~j_W}t2ZAiRd}fjq!#)-%$< z9pYh+$S&;kR`iNW(xP5hB`sCct8NukFsd{un07=pE^2uLxY~n{%wqW2@+P?QmI0GQ hg}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuQ>gT{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{l5Z}E&_QkQI1ZYdDs+^DlIuv5sytJqytxAcCkX4bYNC(n&>m56np6{I9 zy%byJ$^9+}@fG+a_rL=@`GpU_3(VZPj!jajdE%^{+q>PF`^|5EGu~~r8uhP#{{G_{ zV}G)R%fb0w9Qp|w$s|wMfQOvZ+)Av_mbP>fXW)iz`OZyhgL+u!4D2IM)+Rh$0T)&# z{0A#+gsb~Z)}{A~Ne}qM4x7?CV8O~1W3zx4-eb__zR=K_TGgADehx2E&^pq`%HWo@ zKA2vfBtrG|5W3?vDU4_vCE|3at53QV>Gef1?&8pQ&;Sd$WT7Rwv|h0aH{KoT${OC? z3RhT@E3@YY;l$@vMA$4g2-?9Sm@dbtbkS!#9QSePn`jI(e8PX?FWHf0?3Y~fO*X;L zB2^2|J?k=?*hCtB!09LoHzhOv;K7fWngQu!miYiuqx~(p?eE7Ykm4+Gl#BZmolUVS zv<77!9Ooh_>;%%lDjW?-uW*Q?Uzlp*w!f41VK#uaibeV?8{N3o{w`C`^-y%7-OVJl zdmv1%AcC|XrvNHW`_WKkM~I_+{Ml}_bPjEUWa!ddG@3ZEYK?Z zt2jZ0q1FtF+SsJI1w~n%@t3DbW0Ck~Gz7nwDPI8|by_jt$J&4TRgwwwblV?gIZ6&@ z8&CrNmJXa^y^`2N{V>%=q+KZ1Ne&>(qKuHq8-?4W7==S033$cOf5^FZ4mtbkoA~6O z0&#`tq1+hKWAX9MTM;yO|DP#R8wmUiD>v1<_*%V(<^q%)hy)&~xC__N@8Xoq-Ipk# zeo@v8fvHqOjZoSXXWoJOpI_g7wE52;dsC{g@h9Id%0y;ZET@$MuRBy(M%Z#TDvjJr z2AQs!ks`&PpSQY%OCRFU)H@l!IMmH0FI=%g<}S; z6VwYYic&FvC@LCJG{|J0(7YK%)OD*BwMJpr$xwERqAawBvKFXEC=xmKjZ%hT9UttetAtPMpfg)Xe2EZk>#oXD#2*W;A_0ZrMSLJw<5M)(gHHquda lnB^MFw>*L})iU;Za0>5EmCqOSu@X}gE3nC%R)aU~e*xKX75M-F literal 0 HcmV?d00001 From f5fe538c933a18e30a06f32f95a909de31443a97 Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Fri, 5 Oct 2018 10:17:53 +0000 Subject: [PATCH 5/6] Done --- q03_skewness_sqrt/build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/q03_skewness_sqrt/build.py b/q03_skewness_sqrt/build.py index 9a319d3..5aeee2e 100644 --- a/q03_skewness_sqrt/build.py +++ b/q03_skewness_sqrt/build.py @@ -22,9 +22,10 @@ def skewness_sqrt(ny_housing): skew1=s1.skew() skew2=s2.skew() - return 0.59364,0.94218 + return skew2,skew1 c= skewness_sqrt(ny_housing) c + From 05dd6f8786c8973b868bf639c72574fb3f60f870 Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Fri, 5 Oct 2018 13:14:02 +0000 Subject: [PATCH 6/6] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 166 bytes q04_encoding/__pycache__/build.cpython-36.pyc | Bin 0 -> 982 bytes q04_encoding/build.py | 19 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 172 bytes .../test_q04_encoding.cpython-36.pyc | Bin 0 -> 1730 bytes 5 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 q04_encoding/__pycache__/__init__.cpython-36.pyc create mode 100644 q04_encoding/__pycache__/build.cpython-36.pyc create mode 100644 q04_encoding/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc diff --git a/q04_encoding/__pycache__/__init__.cpython-36.pyc b/q04_encoding/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afc3a2cdb90e1c1fa82ecc9221349b9646e895fa GIT binary patch literal 166 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuVDR*{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{TZlX-=vg$g*M}W&i-L=qsTB literal 0 HcmV?d00001 diff --git a/q04_encoding/__pycache__/build.cpython-36.pyc b/q04_encoding/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7ab3bbac3fc46905d65bb1c19da6bb9e140d3ca GIT binary patch literal 982 zcmYjP&2H2%5VrH5>~@P5ao7S92QGVQKycttK@re$C{?8*kd{N_#i4 z_xIQl6mLXYagd(}oHZ|l8yCWAj8t0sL z%lnqGbyl|KytZ{aGWWQLT-NH{Ik%QS9P{Ivcg5~=zoW;7%^H~(#Q93MZDfq-8vkiW zgpwwhhp4yFI^q!zI^>`Cnm&47TY9p$jLiUTFs_bX*eI=|Z#%Piz{78rsrkONu>#W6 zM7jn{Wu$Mw)GM1V*Y6jXM#9!O>?~K8e{hX&=}0V+S=FjsWZEvORQsZ8L|YWvDS-*? zP4l7bh7+3q4QP zrg@X#331UU z;>*=RJgZ8GWzNJ=wRrq2{#eOlRkIXgGkEb7Sc6{@5Lqq&Bxae^vYJEM#3zrQq8`mq z86UNI!8hxL4sKK-@~a^Np+mZX`vD!=JI+Hopz=Oy$D#IWCOA{_Hj0xw>bL+V#bzx5 zU8JB)O_!PL0Oy=1IQX)MrgCJ!aL5eUg7tv@ E1-=IYhX4Qo literal 0 HcmV?d00001 diff --git a/q04_encoding/build.py b/q04_encoding/build.py index a52c57f..22a2a2c 100644 --- a/q04_encoding/build.py +++ b/q04_encoding/build.py @@ -1,3 +1,4 @@ +# %load q04_encoding/build.py # Default imports import pandas as pd from sklearn.preprocessing import LabelEncoder @@ -5,6 +6,22 @@ ny_housing = pd.read_csv('data/train.csv') housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] +def encoding(housing_data): + housing_data=ny_housing[['MasVnrArea','GrLivArea','LotShape','GarageType','SalePrice']] + mean=housing_data['MasVnrArea'].loc[housing_data['MasVnrArea'].notnull()].mean() + housing_data['MasVnrArea']=housing_data['MasVnrArea'].fillna(mean) + highly_occured=housing_data['GarageType'].loc[housing_data['GarageType'].notnull()].value_counts().index[0] + housing_data['GarageType']=housing_data['GarageType'].fillna(highly_occured) + + + housing_data[(housing_data['MasVnrArea']<=housing_data['MasVnrArea'].quantile(0.95)) & (housing_data['GrLivArea']<=housing_data['GrLivArea'].quantile(0.95)) & (housing_data['SalePrice']<=housing_data['SalePrice'].quantile(0.95)) ] + + + + housing_data['LotShape']=pd.DataFrame(housing_data['LotShape'].reshape(-1,1)).apply(LabelEncoder().fit_transform) + c=pd.get_dummies(housing_data['GarageType']) + return pd.concat([housing_data,c],axis=1) +c=encoding(housing_data) +c -# Write your code here: diff --git a/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09cc3f0409c5dff67a552fb00eb9a6af6da8f837 GIT binary patch literal 172 zcmXr!<>g}cS{}^+1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuSorj{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{<{9 literal 0 HcmV?d00001 diff --git a/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc b/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f260ed95409060b0813e34f4991c8dfb4e84cd2 GIT binary patch literal 1730 zcmaJ>O>f&q5Z&d6MA4FvIH_BsMMAX5L4aYUABPqwiWI49q)64G4pM+UFl+8erb3a* z?$W4)^rZZkqWAuhUU}_Ff1#(&ENxj%no{6!v^zUH@6Fr&wATyU-~Il}UmjzBv#qCv z{sVlqL?fBxIh*mAbMl?siCyVRFZX7C?AtLvZ_V0qn=^2yfo!dK+<}g4%RqKyD0e3Q zK8r)}A;g)TVhSA!&7=a=!WmZgZwm$(g zuJT-{iJn8c>`P(9&?u1=hp9e~!|#QDTBxrTh`4jC9%tv)4IY=~W?n! zO*tl@hcX8EKEzjlg2phzSNu2rl1&}szT}ecvlZ@uv~K+$J9}*962X|;8!1Y+CbPVI z^edt_AR{EAnn7W7bWa{c$Jsd)=nXvOW1PaivU*2o4ayw7s6<}7ITQn@_B7<<+9MkO zV5*B@|C|ob${7q*CW_N?@$u)wpG);z&qWGDq#uT35T;U)KrzV*0EG=n=Bk`x9m5y* zKg02+OGAScXls-0@x%F|-le%sWJ251-kR!UPQaIUxA{AKGx{2(w8~O?2c2z78$wB| z8~WVQ+Qq2l7aQgzj7623=v?F#=t!1PQJQEbOnMegERWYC2JRK7in=*)REQt7Lxx~9 zriWg=cfItBO!_9yHDEb{{pf$dUar^xroPj$w16XYP&=q1<4oPDH}xeqEz;DMVw}

#E>ED<4{G5(T&VWZpioF2Y8OrI+r@9edUG4A4g3JtXu2*KU*35uMjJZ9W&}x6 zAh$_Uhe( literal 0 HcmV?d00001