diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..dfa96bb Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4d55501 Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..8349f7a Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index 19f8726..6b43628 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,3 +1,4 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -12,8 +13,19 @@ X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) - - -# Write your code here +def bagging(X_train, X_test, y_train, y_test,n_est): + score_dt =[]#create empty list to store scores for different value of estimators + score_dt1 =[] + array1=[10,20,30,40,50] + for i in [0,1,2,3,4]: + bagging_clf2 = BaggingClassifier(DecisionTreeClassifier(),n_estimators=array1[i],max_samples=0.67,max_features=0.67,bootstrap=True, random_state=9) + bagging_clf2.fit(X_train, y_train) + y_pred_decision=bagging_clf2.predict(X_test) + score_dt.append(accuracy_score(y_test, y_pred_decision)) + y_pred_decision1=bagging_clf2.predict(X_train) + score_dt1.append(accuracy_score(y_train, y_pred_decision1)) + plt.plot(array1[i],score_dt[i]) + plt.plot(array1[i],score_dt1[i]) + plt.show() diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e099f6a Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..8e7f374 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..abd99ef Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..7fd9566 Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..8fc49c8 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -1,9 +1,11 @@ +# %load q02_stacking_clf/build.py # Default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.ensemble import BaggingClassifier from sklearn.metrics import accuracy_score +from mlxtend.classifier import StackingClassifier import pandas as pd import numpy as np @@ -13,6 +15,44 @@ y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +LR=LogisticRegression(random_state=9) +DT1=DecisionTreeClassifier(random_state=9) +DT2=DecisionTreeClassifier(max_depth=9,random_state=9) +bagging_clf1 = BaggingClassifier(LR, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9,oob_score=True) +bagging_clf2 = BaggingClassifier(DT1, n_estimators=100, max_samples=100, + bootstrap=True,oob_score=True) -# Write your code here +bagging_clf3 = BaggingClassifier(DT2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9,oob_score=True) + + + +def ModelUse(ModelToUse): + ModelToUse.fit(X_train, y_train) + y_pred_decision=ModelToUse.predict(X_test) + score=accuracy_score(y_test.reshape(-1,1),y_pred_decision) + y_pred_decision=y_pred_decision.reshape(185,1) + NewXtest=np.concatenate((X_test, y_pred_decision), axis=1) + y_pred_decision1=ModelToUse.predict(X_train) + y_pred_decision1=y_pred_decision1.reshape(429,1) + NewXtrain=np.concatenate((X_train, y_pred_decision1), axis=1) + return(NewXtest,NewXtrain) + +model=[bagging_clf1,bagging_clf2,bagging_clf3] + +def stacking_clf(model,Xtrain,y_train,Xtest,y_test): + stacking_clf = StackingClassifier(classifiers = model, + meta_classifier = LR) + stacking_clf.fit(NewXtrain, y_train) + y_pred2 = stacking_clf.predict(NewXtest) + accuracy = accuracy_score(y_test, y_pred2) + return(accuracy) + +NewXtest,NewXtrain=ModelUse(bagging_clf1) +stacking_clf(model,NewXtrain,y_train,NewXtest,y_test) +NewXtest,NewXtrain=ModelUse(bagging_clf2) +stacking_clf(model,NewXtrain,y_train,NewXtest,y_test) +NewXtest,NewXtrain=ModelUse(bagging_clf3) +stacking_clf(model,NewXtrain,y_train,NewXtest,y_test) diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..ccb89b3 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..24f1c71 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ