From 1d3f49fdcc1ef633aafa98abde7642c95cb668e7 Mon Sep 17 00:00:00 2001 From: rushabh-mehta Date: Tue, 1 Jan 2019 08:20:12 +0000 Subject: [PATCH] Done --- __pycache__/__init__.cpython-36.pyc | Bin 167 -> 168 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 180 -> 181 bytes q01_pipeline/__pycache__/build.cpython-36.pyc | Bin 1725 -> 2041 bytes q01_pipeline/build.py | 36 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 186 -> 187 bytes .../test_q01_pipeline.cpython-36.pyc | Bin 2014 -> 2015 bytes 6 files changed, 36 insertions(+) diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index c105ea65b389830d7486c11505a8fb006d05c448..af718bfd9de7fa52558705cd088ad23ada1515b9 100644 GIT binary patch delta 55 zcmZ3^xPpjy1+*JLd(&CK7qzv8M)Qplu{qX#v?BasN&5j#I5S|{7$K&zu+PmJLB%2UIz$95`6W|m9Vo8u72@*jhY6Xqj)4le1=Py0& zup5m}=DKf?GZIh0gK+CBC%*uPoT!?~MhgdQ&DYh{-PP4qU)9}K%Qddvc>YJtF#a;m z{ald0gfIIWfE%2I1|`L4(h7`bXhl_8jcjTYT(5++$e~UlTVXwNsawd^un{$Bvyknu z6}4%*kZWNl>e4PT1evXIhu67#YSNzQ@y516`=U2n=Zy*BP2K|i25<8Y@A5U?<9)u) zH~4^WPFy+=&eEWp$Gc+iwej^_Ll_gRzch6TYQ`E zoLbKyGpVk_LRt!m!)B#bn_b- z@F#*ve|qrKkm%+^HWuOi*iX2SFdzKDVxB|~5(#GxLY8I0VNk5B`$?3}mGFGjdcTNZ zGF5P*B@^FcbKlE+I0TC7lSkkGc>mP}&J-Jc#o{9mB~K;g$vr=N`G(wtjg?HKnw+c% zcnedD9S51N%#yLLMN9^Yg}N^I+-E9CVqN3nkj+D-Ytv+&1@S~%VZuP~C$VC_(iP55 zbTxw6R6A)hSKzRi+%0~8ezB_KEvBxQ!3bUn-IT>!y-YDB{3-?wegpWBMF;R@Kn6n@ zGr|csdF9kxRP!pgwvC0IS7#=-Ak$TDgTBUVB^!7Hb6@6t;w~IjSsIJ_@ow&5rWbBr zpILb=cQMmALZ-icY7|;~Nfx%Mg4Qk>gPC457R^O#(N;Ea2p9x+b@I-vmN)X&(tK_% zx{EdCfEU25*Cy=Qf*n2N$;3of@Jo{l-{qJ26&SDZ_xM#9-+yf_`l_Dy@;<-DKRB%{ z*7Nn5n|JfIrAZ8s8+jAF*~r&bGjHb`OY_hKW?g^$ziE|4jEThX$p%6bU`S*OA#em6!9l>6tkCBb`Zu&_-3MX{352B0*n=7g zBBm0V(R##=JuXr;rQHZ>E@M#|ip&cIJ6s{Rh80?Ww_m#{lPvNkP>s~|Uk+Q^Ou2TX zVBCWO(G~~Cqy^`6>o8Cr6mpy$CNh%vK#9Z=KVB;Ft^sI7pO)>0{W;42CJ(HIV> z^9(Hn@jk7A1a9n8=cG{W$Fv4gp|T_B0^z>4#!$6I0yTN2*Zd@$M{$M~EQ&^+J1759ROF#Auj4Bk7#S}s8WpuqJbcgZ zhWrH9-gu)aRQ5>Dvnd--Z$)AXjpk`0kFu2cVuY=1bTDOMDB=lZ0Fv>D`BR{Z_YmyG z&^o|eG84WUy|}&ar9mo?NTcyQ2>D)m0^Q@S3JI5e1YnRhaY&bJ5u3PV0N)mA!n|kN zpm#{yY{T3o@-x^wyrS(CS_{kMhcMOEcpiaOGfwGdb_9fw@m>*WcwY!!0GZ0H7Wvya z+X7DpH+=yuIGz+#pxv`I5oaPAhoZTk+bUH$m#>weH~xBmF^H_`JM`-5RJSG_9vQ`YYF*f*0CGsgmDn2^=zRzv&bu@(Z%RDY5DEz(+~la%Sss zkxsK*TVI-4iMw|4()z`40QNp*Mq~OC`}n0Ijq(^|pR1cV_F3OiG?Ti8 zko@Cfwy*kVi)K*_SHiRP*Pw=l2I1%7q!?-uoE54F@(>lBWXO)vaSfBH)vRYoex z!ZBXCYU}-)!o4$f2f?~nXWK3n$Rswz{xDLs9h6G_ISC-it&avyXX~?52O8w4WhnX7 zdNj50XP6GM62c0u(*ljtD$;VAE*EnGEYT)cpQ63|7hC_D^-H8aLLrvY*09RuOzmP= zZ0VPHxm3PiEfs1=QAJ6SD+i<1ImE{45L_M)5Cgsz`FJJc?r98i4cRaSMVyP?se5XY zRPUcHk)j)J*G>myU{n(8iWA#V5Xlgq5aA8+X|W!WR`K}1)WS+A=vPH)`Exa(P2MHA zM?lYw^{09AY6`-AW9|{$R}UygWSrrSu9I33cz{Qrrtpxor-{V|s=SL7RzIiXI;i%5 Sd|sm!I27Af4t9 diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..4eaaf5d 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -1,3 +1,4 @@ +# %load q01_pipeline/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV @@ -5,9 +6,44 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.utils.class_weight import compute_class_weight from sklearn.metrics import roc_auc_score +from imblearn.over_sampling import SMOTE bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') +y = bank['y'] +X = bank.drop(['y'], axis=1) + +columns_to_encode = ['job','marital','education','default','housing','loan','contact','day','month','poutcome'] +lbe = LabelEncoder() +for column in columns_to_encode: + X[column] = lbe.fit_transform(X[column]) +y = lbe.fit_transform(y) +X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=9) +model = [RandomForestClassifier()] + # Write your solution here : +def pipeline(X_train1,X_test1,y_trai1n,y_test1,model1): + bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') + y = bank['y'] + X = bank.drop(['y'], axis=1) + columns_to_encode = ['job','marital','education','default','housing','loan','contact','day','month','poutcome'] + lbe = LabelEncoder() + for column in columns_to_encode: + X[column] = lbe.fit_transform(X[column]) + y = lbe.fit_transform(y) + X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=9) + smote = SMOTE(random_state=9, kind='borderline2') + X_sampled_train,y_sampled_train = smote.fit_sample(X_train,y_train) + rfc = RandomForestClassifier(class_weight={0:1,1:2},random_state=9) + param_grid={ + 'criterion' : ['gini','entropy'], + 'n_estimators' : [50,75,100], + 'max_depth' : [6,8,10], + 'min_samples_leaf' : [10,15,20] + } + grid_search =GridSearchCV(estimator=rfc,param_grid=param_grid,cv=5) + grid_search.fit(X_sampled_train,y_sampled_train) + y_pred = grid_search.predict(X_test) + return grid_search,roc_auc_score(y_test,y_pred) diff --git a/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc b/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc index 9a74c788dfa8cb2d8c0635c15b23112acb858577..6fd03d708b37d70572b4911b489fc2652cef0e82 100644 GIT binary patch delta 55 zcmdnRxSNr~n3tDJ_NivfL=JQ14E>D!+*JLd(&CK7qzv8M)Qplu{qX#v?BasNG~P@xvBc;MX8mECHcAfF8Rr&xv6<2#rgq7`B|yS JCB+kyA_43r6I1{I diff --git a/q01_pipeline/tests/__pycache__/test_q01_pipeline.cpython-36.pyc b/q01_pipeline/tests/__pycache__/test_q01_pipeline.cpython-36.pyc index 1a428fbcf2e081f515235825226995c5adaec811..75d0d8cbc58208e30dcb18efbd4144f2a273fdd8 100644 GIT binary patch delta 264 zcmcb|f1jVjn3tDJ_NivfMvh`e#;DCTj4X^AA`A=+zY6s;@^e%5i%N?#5|c7?b5k=) z67|FLi?WLg5|dN)lk-zjH!o*WW|R>Cs@7yG5&}|1!XN^q9YTmse#ZP$04#$fxcNN` z3!}I+NE;(W8=0)&e>fCL94 p2P+2~2YV6!