diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..02c0c09 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..39d8e2b Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..424b231 Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index 19f8726..10d3fb3 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,3 +1,4 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -12,8 +13,35 @@ X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) - +n_est = 50 # Write your code here +def bagging(X_train,X_test,y_train,y_test,n_est): + dt_clf = DecisionTreeClassifier() + acc_train = [] + acc_test = [] + list_no_estimators = [] + fig = plt.figure() + for i in range(1,n_est): + list_no_estimators.append(i) + bg_clf = BaggingClassifier(base_estimator=dt_clf, + n_estimators=i, + random_state=9, + bootstrap=True, + max_samples=0.67, + max_features=0.67) + bg_clf.fit(X_train,y_train) + y_pred_bg_test = bg_clf.predict(X_test) + acc_score = accuracy_score(y_test,y_pred_bg_test) + acc_test.append(acc_score) + + y_pred_bg_train = bg_clf.predict(X_train) + acc_score = accuracy_score(y_train,y_pred_bg_train) + acc_train.append(acc_score) + plt.plot(list_no_estimators,acc_train) + plt.plot(list_no_estimators,acc_test) + plt.legend(['Train Accuracy','Test Accuracy']) + plt.show() + return fig diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..633ce0b Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..b3a5c21 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8384550 Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..588fc7a Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..4339c52 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -1,3 +1,4 @@ +# %load q02_stacking_clf/build.py # Default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -14,5 +15,38 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +lr = LogisticRegression(random_state=9) +dt1 = DecisionTreeClassifier(random_state=9) +dt2 = DecisionTreeClassifier(random_state=9, max_depth=9) + +bg_clf1 = BaggingClassifier(base_estimator=lr, n_estimators=100, + max_samples=100, bootstrap=True, + oob_score=True) + +bg_clf2 = BaggingClassifier(base_estimator=dt1, n_estimators=100, + max_samples=100, bootstrap=True, + oob_score=True) + +bg_clf3 = BaggingClassifier(base_estimator=dt2, n_estimators=100, + max_samples=100, bootstrap=True, + oob_score=True) + +models = [bg_clf1,bg_clf2,bg_clf3] # Write your code here +def stacking_clf(models, X_train, y_train, X_test, y_test): + predictions = [] + for i in models: + i.fit(X_train, y_train) + pred = np.array(i.predict_proba(X_train)) + predictions.append(pred) + X_bag_train = np.concatenate((predictions[0], predictions[1], predictions[2]), axis=1) + predictions_test = [] + for j in models: + pred = np.array(j.predict_proba(X_test)) + predictions_test.append(pred) + X_bag_test = np.concatenate((predictions_test[0], predictions_test[1], predictions_test[2]), axis=1) + predictions_bag_final = bg_clf1.fit(X_bag_train, y_train).predict(X_bag_test) + return accuracy_score(y_test, predictions_bag_final) +stacking_clf(models, X_train, y_train, X_test, y_test) + diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..dbff713 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..b7cdabb Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ