From 168f653d29528d4fe20fe4036f2e277dc2db54c0 Mon Sep 17 00:00:00 2001 From: HardikaB Date: Tue, 28 Aug 2018 13:39:51 +0000 Subject: [PATCH] Done --- __pycache__/__init__.cpython-36.pyc | Bin 167 -> 163 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 180 -> 176 bytes q01_pipeline/__pycache__/build.cpython-36.pyc | Bin 1725 -> 2065 bytes q01_pipeline/build.py | 52 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 186 -> 182 bytes .../test_q01_pipeline.cpython-36.pyc | Bin 2014 -> 2010 bytes 6 files changed, 51 insertions(+), 1 deletion(-) diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index c105ea65b389830d7486c11505a8fb006d05c448..26326a2c1a4ab6feb45944112ce39d9fe1537789 100644 GIT binary patch delta 50 zcmZ3^xR{Z{n3tDphg5v@L=JPg0R4>o+*Ey!#G;hU>_jL1@cg3e;)2BFRQ=@ql+=ld FZUA_K5K{mE delta 54 zcmZ3?xSWy0n3tF9MSNq_L=JPM5dDn&+*JMaqSVU7lKfnKm;B_?+|<01V*P-k{H)aE JlH!T6ZUE!Z6A=Ia diff --git a/q01_pipeline/__pycache__/__init__.cpython-36.pyc b/q01_pipeline/__pycache__/__init__.cpython-36.pyc index fbce7a136c76784e20d2a9bf4114845d8d5b6a32..47de9832504e68899da23a65ed33e4b7fae54e4b 100644 GIT binary patch delta 50 zcmdnOxPg(wn3tDphg5v@L=JPgSpAIr+*Ey!#G;hU>_jL1@cg3e;)2BFRQ=@ql+=ld F!2o|Y5QhK& delta 54 zcmdnMxP_6!n3tF9MSNq_L=JPMME#8X+*JMaqSVU7lKfnKm;B_?+|<01V*P-k{H)aE JlH!T6!2s(Z6GZ?3 diff --git a/q01_pipeline/__pycache__/build.cpython-36.pyc b/q01_pipeline/__pycache__/build.cpython-36.pyc index 46b855109ab327414cb1a431fa5e80979d5e5542..cc294d9aa087676020f5585012b36a26a71ed378 100644 GIT binary patch literal 2065 zcmZ`(OLN;c5C$lck|^qFTYjW=9?hdlQ>%F#ddPG-anknSBr|qfXV7#wGy%z!!G{4V zu`}dJ&CxUc8SOQf{(=4qUVHLi$faFSmaUmi$pNs~{T5hYzs19P-TG_I`T5T(LVu&V z&j9%wX!0FKh$4zPBDfeeQi4(AdSDPEFo}tky~N8wg;WYz=heU>Rv{a_7TCltWRur} z25A&B=1tN9zw&gQLic}bRKTxI+ChhO3S1TD3uGZ!B#Zx9*Wyb-mvjq#jV}i)WTlX8 zz8b8NwL-3ge1%-Wh~d#Ev_YG+b*hndR+%8OalFaaM^|Zkh-v2(kFJfbKSjT+kQ;1c z^eJ0sH&)REtMM`V4IiOB#0z+$@S&!WvYUFd;rOUq}a*yI&)LT1KNqN!J@>U{TKXei%6Gz6}PjdS^;nQ8_ z3h!X+rykDjZFisXXQ3BSCScy(aYGsf-$nv_wzw;$Kky67q8A16IAM;bw9Z@R4-XRX zsEf#R+_C3K51e{fXs|i#T^Zb;s`8c>g@dv5qtFT5MEJ*GZN`FmexWP;avSC@=d)Gv z&UQ5PW#W4~YzVO^By851hWBF#(eAoI%!|<4Q|-$)^Gaj)`O6=ky}Lr)#O*(E!$U_& zP7+NIu<6M+AMrg{vm{xZo6-J=dFni6KlJ#RvNIqtIs=X)P}-un7Dpy-!aPNE^({a% zp>08v8-P#H1Z75wQk-gOX{ZlzW+sqks-5DjoR&u#*tCiETFWX~H8n@2v;x?w!cH(o ziGGf?Qfm%d%j^PM1FU_4H2_l|;V zCP+_T>lw;$re&qHGcu>~K_ul=%^|u8o@iM+ZKth6Lwrb$QFVeag1M1)9>bWmjy~+6 zqu=*XVu7_b0kTtLbvmj|AkA`yqSK(+XX#ANnB`>gs#- zT&KR5h`PdB10N_{WIQ+BIA(xqfI&2swFFjCR#F*wrMz;(O7JMq&U(1}H)INd+`^1c)MF;f@a-yi#&ZpJ5h zWv03gJIQ;{A#CDC$-*t%Iqz*HYdY4kg*#ZyD#dSOaU1Yw|G+2ifYkdUH)ALfSLRww zb0ZuFu!R=JWLX}nM1-5gg*uXP2nmY0cqT*%e96)b0=a{jJyq@DQ0U6mn%P(=S+EcF zu8NPW&D7(>=W=uYNfN3e^X3c?FnDo1dCm^WTp(RYN-~t90zjb(Tv)2BK}Qy6e(JJZ z;S^B|TbY)9wZA&bSUo?}#H&CHf_5*g5j;be#tV|>6~SEUz_Ua2eRz&Ym8+-MVWCIr z(`#{PauPF{m%WIO1Bh@P_RY=djMP;L0Bu7#h(thipGTh9P?29%-8I#zToO$Th{lDu zp)@K_1)1i$TcFD;kKt@%&c0UfwfqV?mD?6%dHPq*$JU(A#tq=e3jAHrwloV{T1ER8 D3!*p0 literal 1725 zcmYjROOG2x5bmBCj~^L(on)WhWb=ktiq?6+Z50q3!UZBkB8Xan(RjM-9cNzYZYOc% zJsFPs50E(X3lP79TVJ{0zmN-5joE11>Tk+bUH$m#>weH~xBmF^H_`JM`-5RJSG_9vQ`YYF*f*0CGsgmDn2^=zRzv&bu@(Z%RDY5DEz(+~la%Sss zkxsK*TVI-4iMw|4()z`40QNp*Mq~OC`}n0Ijq(^|pR1cV_F3OiG?Ti8 zko@Cfwy*kVi)K*_SHiRP*Pw=l2I1%7q!?-uoE54F@(>lBWXO)vaSfBH)vRYoex z!ZBXCYU}-)!o4$f2f?~nXWK3n$Rswz{xDLs9h6G_ISC-it&avyXX~?52O8w4WhnX7 zdNj50XP6GM62c0u(*ljtD$;VAE*EnGEYT)cpQ63|7hC_D^-H8aLLrvY*09RuOzmP= zZ0VPHxm3PiEfs1=QAJ6SD+i<1ImE{45L_M)5Cgsz`FJJc?r98i4cRaSMVyP?se5XY zRPUcHk)j)J*G>myU{n(8iWA#V5Xlgq5aA8+X|W!WR`K}1)WS+A=vPH)`Exa(P2MHA zM?lYw^{09AY6`-AW9|{$R}UygWSrrSu9I33cz{Qrrtpxor-{V|s=SL7RzIiXI;i%5 Sd|sm!I27Af4t9 diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..78c9832 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -1,3 +1,4 @@ +# %load q01_pipeline/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV @@ -5,9 +6,58 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.utils.class_weight import compute_class_weight from sklearn.metrics import roc_auc_score - +import seaborn as sns +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import f1_score, confusion_matrix +from sklearn.metrics import precision_score, recall_score +from sklearn.metrics import roc_auc_score +from sklearn.linear_model import LogisticRegression +from imblearn.under_sampling import RandomUnderSampler +from imblearn.over_sampling import RandomOverSampler +from imblearn.over_sampling import SMOTE bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') +label_enc = LabelEncoder() +for column in bank.select_dtypes(include=['object']).columns.values: + bank[column] = label_enc.fit_transform(bank[column]) +X_train, X_test, y_train, y_test = train_test_split(bank.iloc[:,:-1], + bank.iloc[:,-1], + random_state=9) + +rf = RandomForestClassifier(random_state=9) +lr = LogisticRegression(random_state=9) +model=[rf,lr] # Write your solution here : +def pipeline(X_train, X_test, y_train, y_test,model): + dict1=dict() + dataset=[[X_train, X_test, y_train, y_test]] + # Create the Under samplers + rus = RandomUnderSampler(random_state=9) + X_sample2, y_sample2 = rus.fit_sample(X_train, y_train) + dataset.append([X_sample2, X_test, y_sample2, y_test]) + + + ros = RandomOverSampler(random_state=9) + X_sample3, y_sample3 = ros.fit_sample(X_train, y_train) + dataset.append([X_sample3, X_test, y_sample3, y_test]) + + + smote = SMOTE(random_state=9, kind='borderline2') + X_sample4, y_sample4 = smote.fit_sample(X_train, y_train) + dataset.append([X_sample4, X_test, y_sample4, y_test]) + + roc_old=0 + roc_new=0 + for m in model: + for X_train, X_test, y_train, y_test in dataset: + m.fit(X_train, y_train) + roc_new=roc_auc_score(y_test, m.predict(X_test)) + if(roc_new>=roc_old): + dict1.clear() + dict1[m]=roc_new + roc_old=roc_new + return list(dict1.keys())[0],list(dict1.values())[0] diff --git a/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc b/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc index 9a74c788dfa8cb2d8c0635c15b23112acb858577..d989fffdaa29013d83eb75c477808a79aaea963c 100644 GIT binary patch delta 50 zcmdnRxQ&s+n3tDpmsEW8L=JPgWc`f%+*Ey!#G;hU>_jL1@cg3e;)2BFRQ=@ql+=ld FkpP0A5TO77 delta 54 zcmdnSxQmg)n3tF9n&Z5vi5%ui>G~P@xvBc;MX8mECHcAfF8Rr&xv6<2#rgq7`B|yS JCB+kCBLV9T6G{L8 diff --git a/q01_pipeline/tests/__pycache__/test_q01_pipeline.cpython-36.pyc b/q01_pipeline/tests/__pycache__/test_q01_pipeline.cpython-36.pyc index 1a428fbcf2e081f515235825226995c5adaec811..a533d876e61d494f8b7b878a009df6b9c226b612 100644 GIT binary patch delta 259 zcmcb|e~X{Pn3tDpmsEW8Mvh`e#;DCTj4X`GA`A=+zjE|5@^e%5JravjGP4t%^uzOu zvWp86lT-DR^HWkcFK1F_lo0?b)MP3W0#Zf7AOfTlLWoX&#{5$NEQ2Jt`8^8@qqsCk z8zV#;lH_DAwtjmtkO>kXLKZ|Qf(V2fFiR99#s?($fw-6hNN_N6uyU|*uov-7zQLBo bs4&@?-I!5oayh%alsrr^hAERbu(?(R delta 306 zcmcb`e~+KTn3tF9n&Z5vjU2^{jJGz|FtRYJi!v}U{3_7T$j?pHPcKTXOf1RI)pyBH zF3nBND=F3wD9X=DO)e?kyqHOuQAQA`Sd*zp7)TX~fC!Lo2q8B45%W(`undwQQ2Lhg zWJ6Zv$-=C6WMqI`O-6`zG({>vMWT}x*h=liK}Je~2ssd;1R@ZM!7MS57(b8z;bIOT z!NJJE%E89LUc^873R@PV;$(YvV=HN(L=