From a29865e0b7aa9d84141e00ac41330533e3f975fd Mon Sep 17 00:00:00 2001 From: AdityaMV1215 Date: Tue, 29 May 2018 09:17:55 +0000 Subject: [PATCH] Done --- __pycache__/__init__.cpython-36.pyc | Bin 167 -> 167 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 180 -> 180 bytes q01_pipeline/__pycache__/build.cpython-36.pyc | Bin 1725 -> 2260 bytes q01_pipeline/build.py | 40 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 186 -> 186 bytes .../test_q01_pipeline.cpython-36.pyc | Bin 2014 -> 2014 bytes 6 files changed, 40 insertions(+) diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index c105ea65b389830d7486c11505a8fb006d05c448..3e8a88601b967f913b486d2582539b5e296168ab 100644 GIT binary patch delta 47 zcmZ3^xSWy0n3tF9c^+HzL=HP?$CS*H%0%BVLnA{|{qX#v?BasN?(%jU%l4AXUqWrAX?(%jU%l4AXUqWrAX diff --git a/q01_pipeline/__pycache__/build.cpython-36.pyc b/q01_pipeline/__pycache__/build.cpython-36.pyc index 46b855109ab327414cb1a431fa5e80979d5e5542..779562a069defc23a8e4df2e2ef1589ef3edaf57 100644 GIT binary patch literal 2260 zcmaJ?OK;mo5ayDUM7EzzC! zc#YS21Gs&W6MeD8o4mz0&dt{*-KINqmtGNN;Iz*Py~;Ov=O>Nsaq>(XbT8mu+LM8v zHk4#;U@H+TJB~b8rIm-$<{xjB11ID{!o2g81w8a0gc9}~c`T0I zk(){S4<+NS2-LkWjFlUVKav_xAKZuSxo3~Rdyb&>C>*=6%y}xtpelC50Jv=@^rM*) zwu74OcfuV{6!5DubZjW-*0oK*zg_;PHaTB3YQ!9o%rnsasxIPv4~P~c0u5x zFg4k!8;=jO=kKpBAmG|Sr^V&H-S;A`$n08nte6r5B3rP+$pOJ<7rp{~NASgef=Z=L z2`60V#<@PvB{^>Pw0SYyFYUCH&NHq!5Ez$L&XA9LUQ@e(x6 z8wr^=RSB$Xo!-pOxDIEursbr8>lXw^(1D!hiUZ?a?HBT<0UD66!wohvWB_Wb3ctdy zo|E}zvN^57{*8tHTAz33-E7`W+Sz<--cz;2Oss`I?>yKSpx(o42s?M~89c1Akn{@#=S%^q2S7(@K4FvuE``jXLr;i813jpPav9f^qq zJH~~D1ovOy;(o~rWK!^+9Sc#4Dy{8amGcYCN$>;>flZ`9Nx@^JGW?I zlv(Fv~ad&06+#@j0-O{kD=IEk#T+-yTEHqE;~Tg>)V8G@$=HU2&NDZ&nNg zF$t9|0&02K+5n}-TX3PNgdfp{$XXq@`DkeXtzYJ#CAuf5wKDI3qM^V&pnB}n@+%vH zyV_VjyEG}SB(Rj0RsvL(kF#D&^Jm}?!KndV?srWdf?GcPIGlvO7=FWDb;cgQIJkLm zd-yVxCvn6aF~p8MJen}i6Tuh`g+Bh6ITJS!_9M_4KnF%2Pa&eiH=iHaksApQq{HKx z>+${QEVb4wOzHRwAR4I=3%*ZCgS1GWR7jhY$PVd{zHSneTk+bUH$m#>weH~xBmF^H_`JM`-5RJSG_9vQ`YYF*f*0CGsgmDn2^=zRzv&bu@(Z%RDY5DEz(+~la%Sss zkxsK*TVI-4iMw|4()z`40QNp*Mq~OC`}n0Ijq(^|pR1cV_F3OiG?Ti8 zko@Cfwy*kVi)K*_SHiRP*Pw=l2I1%7q!?-uoE54F@(>lBWXO)vaSfBH)vRYoex z!ZBXCYU}-)!o4$f2f?~nXWK3n$Rswz{xDLs9h6G_ISC-it&avyXX~?52O8w4WhnX7 zdNj50XP6GM62c0u(*ljtD$;VAE*EnGEYT)cpQ63|7hC_D^-H8aLLrvY*09RuOzmP= zZ0VPHxm3PiEfs1=QAJ6SD+i<1ImE{45L_M)5Cgsz`FJJc?r98i4cRaSMVyP?se5XY zRPUcHk)j)J*G>myU{n(8iWA#V5Xlgq5aA8+X|W!WR`K}1)WS+A=vPH)`Exa(P2MHA zM?lYw^{09AY6`-AW9|{$R}UygWSrrSu9I33cz{Qrrtpxor-{V|s=SL7RzIiXI;i%5 Sd|sm!I27Af4t9 diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..7e26707 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -3,11 +3,51 @@ from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.feature_selection import RFE +from imblearn.over_sampling import SMOTE +from sklearn.linear_model import LogisticRegression from sklearn.utils.class_weight import compute_class_weight from sklearn.metrics import roc_auc_score bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') +y = bank['y'] +X = bank.drop('y', axis=1) +X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=9) +model = GradientBoostingClassifier(random_state=9) # Write your solution here : +def pipeline(X_train,X_test,y_train,y_test,model): + bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') + y = bank['y'] + X = bank.drop('y', axis=1) + param_grid = {'n_estimators':[10,20,30], 'max_features':[2,4,6,8,12,16], 'max_depth':[2,4,6,8]} + numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] + num_features = X_train.select_dtypes(include=numerics).columns.tolist() + cat_features = list(set(X_train.columns.tolist()) - set(num_features)) + onehot_en = ['marital','contact','month','job'] + label_en = list(set(cat_features) - set(onehot_en)) + for val in cat_features: + temp = X_train[X_train[val] != 'unknown'][val].mode()[0] + X_train.loc[X_train[val] == 'unknown',val] = temp + X_test.loc[X_test[val] == 'unknown',val] = temp + encoded_df_train = X_train + encoded_df_test = X_test + le = LabelEncoder() + for val in cat_features: + encoded_df_train[val] = le.fit_transform(encoded_df_train[val]) + encoded_df_test[val] = le.fit_transform(encoded_df_test[val]) + y_train1 = le.fit_transform(y_train) + y_test1 = le.fit_transform(y_test) + sm = SMOTE(random_state=9) + X_res_train, y_res_train = sm.fit_sample(encoded_df_train,y_train1) + X_res_test, y_res_test = sm.fit_sample(encoded_df_test,y_test1) + #model.fit(X_res_train,y_res_train) + search = GridSearchCV(model, param_grid) + search.fit(X_res_train,y_res_train) + y_pred = search.predict_proba(X_res_test)[:, 1] + auc = roc_auc_score(y_res_test,y_pred) + return search, auc +#print(pipeline(X_train,X_test,y_train,y_test,model)) diff --git a/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc b/q01_pipeline/tests/__pycache__/__init__.cpython-36.pyc index 9a74c788dfa8cb2d8c0635c15b23112acb858577..0454266e2285f709a24d2367cc6e4dd4e66ad2f9 100644 GIT binary patch delta 47 zcmdnRxQmg)n3tF9c^+HzL=HP?$CS*H%0%BVLnA{|{qX#v?BasNkXLKZ|Qf(V2fFiR99#s?($fw-6hNN_N6uyU|*uov-7 fzQLBos4&@?-I!5oayh%alsrr^hAERbuz$`J47(b8z;bIOT!NJJE%E89LUc^87 g23r=R;$&xbV@B!8