diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..52bf936 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..62c675c Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..912dccb Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index c34fb02..ff2167e 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,18 +1,43 @@ import pandas as pd from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import BaggingClassifier +from sklearn.tree import DecisionTreeClassifier import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score -# Data Loading dataframe = pd.read_csv('data/loan_prediction.csv') X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) - -# Write your code here +scores1=[] +scores2=[] +def bagging(X_train, X_test, y_train, y_test,n_est): + n_est=51 + estimators=range(1,n_est) + decision_clf = DecisionTreeClassifier() + + for est in estimators: + bagging_clf = BaggingClassifier(decision_clf, n_estimators=est, max_samples=0.67,max_features=0.67, + bootstrap=True, random_state=9) + bagging_clf.fit(X_train, y_train) + # test line + y_pred_bagging1 = bagging_clf.predict(X_test) + score_bc_dt1 = accuracy_score(y_test, y_pred_bagging1) + scores1.append(score_bc_dt1) + # train line + y_pred_bagging2 = bagging_clf.predict(X_train) + score_bc_dt2 = accuracy_score(y_train, y_pred_bagging2) + scores2.append(score_bc_dt2) + + plt.figure(figsize=(10, 6)) + plt.title('Bagging Info') + plt.xlabel('Estimators') + plt.ylabel('Scores') + plt.plot(estimators,scores1,'g',label='test line', linewidth=3) + plt.plot(estimators,scores2,'c',label='train line', linewidth=3) + plt.legend() + plt.show() diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..f25df17 Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..b8a2342 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..421723d Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..d8f87f5 Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..b354c0b 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -14,5 +14,37 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +# Solution # Write your code here +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) + +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) + +model = [bagging_clf1, bagging_clf2, bagging_clf3] + + +def stacking_clf(model, X_train, y_train, X_test, y_test): + predictions = [] + for i in model: + i.fit(X_train, y_train) + pred = np.array(i.predict_proba(X_train)) + predictions.append(pred) + X_bag_train = np.concatenate((predictions[0], predictions[1], predictions[2]), axis=1) + predictions_test = [] + for j in model: + pred = np.array(j.predict_proba(X_test)) + predictions_test.append(pred) + X_bag_test = np.concatenate((predictions_test[0], predictions_test[1], predictions_test[2]), axis=1) + predictions_bag_final = clf1.fit(X_bag_train, y_train).predict(X_bag_test) + return accuracy_score(y_test, predictions_bag_final) + + + diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..ee5ac5b Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..146cfc5 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ