From ec9c03b50482cb0081c4fa34e0a46f646a3fbdfe Mon Sep 17 00:00:00 2001 From: sonal7930 Date: Wed, 20 Jun 2018 14:20:45 +0000 Subject: [PATCH 1/2] Done --- __pycache__/__init__.cpython-36.pyc | Bin 0 -> 151 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 163 bytes q01_bagging/__pycache__/build.cpython-36.pyc | Bin 0 -> 1652 bytes q01_bagging/build.py | 22 +++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 169 bytes .../test_q01_bagging.cpython-36.pyc | Bin 0 -> 960 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 168 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1648 bytes q02_stacking_clf/build.py | 45 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 174 bytes .../test_q02_stacking_clf.cpython-36.pyc | Bin 0 -> 2382 bytes 11 files changed, 67 insertions(+) create mode 100644 __pycache__/__init__.cpython-36.pyc create mode 100644 q01_bagging/__pycache__/__init__.cpython-36.pyc create mode 100644 q01_bagging/__pycache__/build.cpython-36.pyc create mode 100644 q01_bagging/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc create mode 100644 q02_stacking_clf/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_stacking_clf/__pycache__/build.cpython-36.pyc create mode 100644 q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f7bce8a9344d7f94f18cdcfe73b307b03811254 GIT binary patch literal 151 zcmXr!<>eCGcP5Gf2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUoQF?`MIh3#rb)O zIp&te2KwRoMcKs#iOH$@$@wX%`l)%vskuoxsqwj~B^miC#qkA2`B|ySCB^#j@tJv< aCGqik1(mlrY;yBcN^?@}K&BT1F#`Zx9wt}- literal 0 HcmV?d00001 diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b49516031831584403b4ca30506ff60f669d9a74 GIT binary patch literal 163 zcmXr!<>eCGcP5Gf2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUjh0V`MIh3#rb)O zIp&te2KwRoMcKs#iOH$@$@wX%`l)%vskuoxsqwj~B^miC#qkA2`B|ySCB^!M28QuT miRtN?dFlG`@tJv%WfMt6eT%Njb%x;6F-sy1&VCcg_YEcqCo2O5p-8?YM=pIU_cEic@&OjLQ-jM z$Sc{EyU0frUG)$8A6*ofZPA5(p_}%Sk`f0k$^kFQbNRSD=MIlHH(``^q1g#JLc zN(lW=pz2@2aKy1h6hliSDLm?7i+R#70vfo!FT*0Dk?Ug_(^$l#1`lRtOg^|FcHjieQc0^~i#WBa%1Z?}749EeZ+qck;Lw1BGw#5!_@Pu!?^1t=z9eS7U z(mk;+4tUI)GXlHas&kXE$ znCX`b3?~ndT>0(Eq5-2Wp^aq#`Gu&Pij8?$q}nhe79W5=0})(_z}mhCRsR5Hh%SRU zn&Y`Q_Xj@EM6Ti=i9vHRz}(~hRc!E#T!w~>ylFVY(DO&ZGz7xYHPFd}19WQ?9-tY) z=zr%#ujhm_WJLrzo5CWP)rr7XoP_M#EJjEk&gKs>09Q3^# z1i=yG-;gPRfIWv{zdz;q%Cgc8Xmgx`h@USnGdXxfTjx&L6hNyTO`Ps7k!bst&!!jMSUK&W>(}8kH?NMMwNM?NuK;K*@C*9u z%idX8h@LLTOn&&$(Zk-?rFyO_mWf_ga?ulGEsDMrX(7y6$u&e%jzng3@5RGMHY%H^ zz5XPZ{IEK&TkGKUq_2h9g`4RP7=)WRaWx?cPRPZ(tBHq?WtlOlj}PBo`C4hKIDzVv zwnW~5$}{_S;S<${aTlv(*=#}MAZ3rNGgOP@cE)M>6>Uxf{=O~+23wv5O zs=b0$b@-CWiO@BvIBh(If9(s!3Q>ENG}NzT309w#-*qF^wCgvlX<4&rjg6}g203tB zN01W}Y4kGYU^&$-a$&pqZl#MoS_OIsc32VU*~`)9+H2tDS!QHs4Xp)t*-8`0b9+sh_o+A0 zf^NM5KySxDxNOy)-MYQr2l7SbnRdxdV$`(O<$iPXeCGcP5Gf2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUt#(g`MIh3#rb)O zIp&te2KwRoMcKs#iOH$@$@wX%`l)%vskuoxsqwj~B^miC#qkA2`B|ySCB^!M28QuT riRtN?dFlEksl`B%`1s7c%#!$cy@JYH95%W6DWy57b|CwTftUdRe3vZy literal 0 HcmV?d00001 diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c9519b65967d72b46c0dc95dce53c6f9973f574 GIT binary patch literal 960 zcmZ`%&2H2%5VrHP8|_a85<(y$$b~~B)RguBl@O|^s^0bj5>gIW+1Q)i;v~DaQ^Bg7 zwlBj=@B}=8ublQ3I5Cs`X?noXj3@Tz`DQ-*WH{_S{PjCoIE4Hmt>Hm@2BzPF8mJM{9jAFdEAtGG>;~?gf$3-91W70-iNh&(Zb{-IJDE5D<%^4N zP_z*|g!=13ltzay`K$19b}dSXB2T%1QwX;GRBNHk#iCB7b)_gH$9h`GiS@AipF|A+ z_V-IXtqKwAs!ZjZ)03n4bEUrOd76nhtGI|osYNlCf)&C{E3VmGRaa1iju%I-*;cpM zK>ND!thIbN|857kwZnCRQRDRz8qnpFmS~4+3q7E_fA5Emlrvz2b1cfL&Y|32$6l%E}Cr< zA@Hup@P9q-;G@X50b^xa2*zxWv7+KN$j$74F}%CYjygc<2Ydeem8cGEpf$t+4ZLP$ z6m@M-motNPDtwN%J1gI#(nl6KPo08t+t!QK!Q25+`=wazW literal 0 HcmV?d00001 diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a47c2b0a82a9f0726c2593cf3daad8c864263d37 GIT binary patch literal 168 zcmXr!<>eCGcP5Gf2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU!nRL`MIh3#rb)O zIp&te2KwRoMcKs#iOH$@$@wX%`l)%vskuoxsqwj~B^miC#qkA2`B|ySCB^!M21fD4 rC5g$|nR)5)$vJ8I@$s2?nI-Y@dIgoYIBatBQ%ZAE?LgKQ12F>tQA;ec literal 0 HcmV?d00001 diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e5de6a762074005b24e9bbc3eaac948d3e23527 GIT binary patch literal 1648 zcmah|OK&4Z5T5Swylm%fJI`GfZq6azO(YP~3J@D{LVH+N%Blg4#?xhIoOz|YgOZhf zGJ7;X0|_L44Y$5>%3t6D)nhv(D-hl4>gunmx~i+HPgYjyzx5Bl{nfXuKdgHL&(;^X ziOQotrk<3N%Km z+?x`%25X~r?sE4XMY{El4(P7MHtwOV3fTk#8?ea(UgwQj?YkP=VlCEY9dHn7&IsEE zd%^8gT%E7XDBA@OcK9mJHI%OL^_k1|5Zita+ed8oJ?sGXU>^>KHhc6R@wWdZ-p>ET z`xv%HA51Za@EBTmT>Fme%sIZnH-EI)6V!SFk6;Vh(1Om?F_zr@#_De0nnHDnURNSY z@=$?Pp)AuxA=WvCIFU(_zZL*5(n!i=kbpqR=GVnAkt&H_!B7C=MULpkm(g&TJgLU-pWg;1Fsg%jNXc8! zO%dh1$U>r;XibzTY&qwNw_i>?zX%S2s2RU1(zo95yVRNE}tHoE{QL&8;!GG|0+rOQF)`A_u)hIA`g&Q z)F*Y?BpuSEbzFFw^LP?Q;r1^a;t_KHknV?iTVA9PiTtR_M<^jh_t?&Yq9*!UNEQAc zVH>EmgwiBO!?3ceQmxmQqUE!MX+^fSgz=FgiRHW!Zi!zZl48TCIQdnEPosIMeE|^< z@iXbbe6xY6vaTg*5o_{BlN-@S1F>yL$B-Q)x`r8gR?*{%p6F(O{*z7ao;^4nKRBI; zmbt(wMUpG5-g#6)m9MIR_sG6c^Pl5$uTuETWJf+n(j?326W=yD-=sUZ&uh>mKFV>? M`cj6HYEr-bH>bF}y8r+H literal 0 HcmV?d00001 diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..5f2bbbd 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -15,4 +15,49 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) # Write your code here +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) + +model = [bagging_clf1, bagging_clf2, bagging_clf3] + +#Actual function call +dataframe = pd.read_csv('data/loan_prediction.csv') +X = dataframe.iloc[:, :-1] +y = dataframe.iloc[:, -1] + +X_train,X_test,y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=9) +print (X_train.shape) +print (X_test.shape) +print (y_train.shape) +print (y_test.shape) + +def stacking_clf(model, X_train,y_train, X_test, y_test): + + x_train_mdl = pd.DataFrame() + for mdl in model: + + mdl.fit(X_train, y_train) + x_train_mdl = pd.concat( [x_train_mdl, pd.DataFrame( mdl.predict_proba(X_train))] + ,axis=1) + + mdl_clf = LogisticRegression(random_state=9) + mdl_clf.fit(x_train_mdl,y_train) + + x_test_mdl= pd.DataFrame() + for mdl in model: + x_test_mdl = pd.concat( [x_test_mdl, pd.DataFrame( mdl.predict_proba(X_test))] + ,axis=1) + + y_pred = mdl_clf.predict(x_test_mdl) + + score = accuracy_score(y_test, y_pred) + return float(score) +print(stacking_clf(model,X_train,y_train,X_test,y_test)) diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..912148499205703855e00c2930569fb2adf1f208 GIT binary patch literal 174 zcmXr!<>eCGcP5Gf2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU(xy*`MIh3#rb)O zIp&te2KwRoMcKs#iOH$@$@wX%`l)%vskuoxsqwj~B^miC#qkA2`B|ySCB^!M21fD4 wC5g$|nR)5)$vJ8IC8@yv&mLc)fzkTO2mI`6;D2sdgYsi-DK{0DRmp8UO$Q literal 0 HcmV?d00001 diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f48ff484f8a166c69be0e6c332e76dc4500cbf82 GIT binary patch literal 2382 zcma)7&2HO95ayDUM9GpP`QQ07(>5p+q*h`diU38EHc5j3tqUXu8Y~JhG9^Z|Szx+mj z_AKi!YwE&x>mEMRCzi!5M!1y_N{Gd5=5RZ4sDrx2T<#_%S~7Zxmy-&u7`@EBq)Mws zukczjLuZWc@p{sr4Wn22EWH31_L{6VBD4jqy*XB8)wedvrMGt1UbX1_ly%uy7N7$2 zu)yl9!DbKL?_Ih`m*_HGff=-1I3#oxsuS&^(Hg9IXwys3fQzh!_Z-g7vH8O?y^OZi zGqx*eyL85O6)wXSxY~2*+9QiCu*EkPTLdz4sL!fTt@g@TL)Rv`w+&L=4<(H4!cdng zj9$cPFNk;-9cFu=x&zKb(UX0M(9%>QjMG42U?BTER%ly#08uRCEZr6W_jxE~+>Jrt z%-q*mFP18foJR-ln;J zzdNq#L~U9v5{QS8*F~7JED11m1#Keg6OSZ(9WdytU1J$DG;g;{w4Mf-dz^$S6Ozto zFBv9%4w6=PvP?-_VBdt$YL@K;GRg!*7<_zbtnT3>Z=wK;5@u1G5oW)!sKcDe=&};N zyR6JAIF^`)z*qC8M*Qg%$dO*2zVuf$jql?lH&7@`krDZU99Vm{at;V1tJVnrqW^I6 z-nEykks&vJ-`x`WbK??<;o8I3STY67$Lb9dNR{-jvzz{-_!Xq6wOyh8go`4d!J>gs z4_^*Ko;w`Uww=2YcsF--?SHbw0=oUP+u6+$=*TP$`KNa_Z*{)Q#0%LEBj`jKgASw; zk{u2~0%|v75^I_5VG^?Q^45l~&@s6>x`5Ki;OyLbe?M;(1)hpn!vv&G)*>DmesC7Z z^>YEIpw)o}+N-EcLt6lgdxL!pqq8-3&}(?-7(oWzaKM%SD&zx@KFj`!s^Q)WsH8 zxvO)TV3K=b6b(ce?VkZAzjCgmL6Lf(!#~!L>5j2T6B%KX;rjNO<~q-b=?}ac=8xVB zvu<(bj9h&hm}cw!@#8n%uYdl1S1e=D@WzvATv>)hIbjJLHKU{T}?8)_5Wci!nfMx+zWyfnGpne zEeNpw18njjuLptNF^U;OWN}UBr|CSEq)FFseHJHl2b#ip#B=d33vC_$i?P5Y7Hnd!HM{f5S^#p`0z*C7<^?B9R%|hLnXHia` Date: Sat, 7 Jul 2018 20:09:37 +0000 Subject: [PATCH 2/2] Done --- q01_bagging/build.py | 48 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/q01_bagging/build.py b/q01_bagging/build.py index f13c136..4276dce 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,9 +1,8 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import BaggingClassifier -import matplotlib -matplotlib.use('Agg') import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score @@ -16,25 +15,28 @@ # Write your code here -values = list(range(1,50)) -def bagging(X_train, X_test, y_train, y_test,n_est): - final=[] - def bagging_subset(n_est): - bagging_clf1 = BaggingClassifier(DecisionTreeClassifier(), n_estimators=n_est, max_samples=0.67,max_features=0.67, +def bagging(X_train, X_test, y_train, y_test,n_est = 10): + decision_clf = DecisionTreeClassifier() + + # Fitting single decision tree + decision_clf.fit(X_train, y_train) + y_pred_decision = decision_clf.predict(X_test) + score_dt1 = accuracy_score(y_test, y_pred_decision) + + + # Fitting bagging classifier with DecisionTreeClassifier + bagging_clf1 = BaggingClassifier(decision_clf, n_est, max_samples=0.67,max_features=0.67, bootstrap=True, random_state=9) - bagging_clf1.fit(X_train, y_train) - y_pred_bagtest = bagging_clf1.predict(X_test) - score_test = accuracy_score(y_test, y_pred_bagtest) - - y_pred_bagtrain = bagging_clf1.predict(X_train) - score_train = accuracy_score(y_train, y_pred_bagtrain) - #print n_est1, score_test, score_train - return (n_est, score_test, score_train) - - results = map(lambda n_est : bagging_subset(n_est), values) - results = pd.DataFrame(results) - - plt.plot(results[:][0],results[:][1]) - plt.plot(results[:][0],results[:][2]) - plt.show() - return + + bagging_clf1.fit(X_train, y_train) + y_pred_bagging = bagging_clf1.predict(X_test) + score_bc_dt = accuracy_score(y_test, y_pred_bagging) + + return plt.plot(n_est,score_bc_dt) + + + + +print(bagging(X_train, X_test, y_train, y_test,n_est = 10)) +plt.show() +