diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..61fd6d4 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8932fe9 Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..d90ecce Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index 19f8726..6fcebbe 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,3 +1,4 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -6,14 +7,28 @@ from sklearn.metrics import accuracy_score plt.switch_backend('agg') -# Data Loading dataframe = pd.read_csv('data/loan_prediction.csv') - X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +def bagging(X_train, X_test, y_train, y_test, n_est): + accuracy_test, accuracy_train = [], [] + for i in range(1,n_est+1): + bagging_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=i, + random_state=9, bootstrap=True, + max_samples=0.67, max_features=0.67) + bagging_clf.fit(X_train, y_train) + y_pred_test = bagging_clf.predict(X_test) + y_pred_train = bagging_clf.predict(X_train) + accuracy_test.append(accuracy_score(y_test,y_pred_test)) + accuracy_train.append(accuracy_score(y_train,y_pred_train)) + plt.plot(range(1,51), accuracy_test, label='test', color='blue') + plt.plot(range(1,51), accuracy_train, label='train', color='red') + plt.xlabel('n-estimators') + plt.ylabel('accuracy') + plt.legend() + plt.show(); -# Write your code here diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..06030ea Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..4ac7f33 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..dcc751f Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..4956857 Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..7fb8722 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -1,3 +1,4 @@ +# %load q02_stacking_clf/build.py # Default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -7,12 +8,35 @@ import pandas as pd import numpy as np -# Loading data dataframe = pd.read_csv('data/loan_prediction.csv') X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -# Write your code here +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) + +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +model = [bagging_clf1, bagging_clf2, bagging_clf3] +def stacking_clf(model, X_train, y_train, X_test, y_test): + y_pred_train, y_pred_test=[], [] + for ind_model in model: + ind_model.fit(X_train, y_train) + y_pred_train.extend(ind_model.predict(X_train)) + y_pred_test.extend(ind_model.predict(X_test)) + X_test_new = X_test.append(X_test.append(X_test)) + X_train_new = X_train.append(X_train.append(X_train)) + clf1.fit(X_train_new, y_pred_train) + y_pred_test_metaclf = clf1.predict(X_test) + return accuracy_score(y_test, y_pred_test_metaclf) + + + diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..120d07e Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..cf93ae1 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ