diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..88efed0 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..5cd7519 Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..3ea9754 Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index 19f8726..3333f92 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,10 +1,11 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import BaggingClassifier import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score -plt.switch_backend('agg') +import numpy as np # Data Loading dataframe = pd.read_csv('data/loan_prediction.csv') @@ -17,3 +18,30 @@ # Write your code here +def bagging(X_train, X_test, y_train, y_test,n_est): + + i=1 + dict1=dict() + dict2=dict() + + while (i<=50): + # Fitting bagging classifier with Logisitc Regression + bagging_clf2 = BaggingClassifier(DecisionTreeClassifier(), n_estimators=i, max_samples=0.67, + bootstrap=True, random_state=9,max_features=0.67) + + bagging_clf2.fit(X_train, y_train) + y_pred_bagging_t = bagging_clf2.predict(X_train) + score_bc_dt_t = accuracy_score(y_train, y_pred_bagging_t) + y_pred_bagging = bagging_clf2.predict(X_test) + score_bc_dt = accuracy_score(y_test, y_pred_bagging) + dict1[i]=score_bc_dt_t + dict2[i]=score_bc_dt + i+=1 + + plt.plot(np.arange(1,51),dict1.values()) + plt.plot(np.arange(1,51),dict2.values()) + plt.show() + + + + diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..7bec551 Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..1006be0 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..60f931e Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..c73a452 Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..88b099c 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -1,11 +1,14 @@ +# %load q02_stacking_clf/build.py # Default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.ensemble import BaggingClassifier from sklearn.metrics import accuracy_score +from mlxtend.classifier import StackingClassifier import pandas as pd import numpy as np +from sklearn.ensemble import VotingClassifier # Loading data dataframe = pd.read_csv('data/loan_prediction.csv') @@ -14,5 +17,37 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) + +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) + +model = [bagging_clf1, bagging_clf2, bagging_clf3] # Write your code here +def stacking_clf(model, X_train, y_train, X_test, y_test): + voting_clf_hard = VotingClassifier(estimators = [('Logistic Regression', model[0]), + ('Decision Tree 1', model[1]), + ('Decision Tree 2', model[2])], + voting = 'hard') + + voting_clf_hard.fit(X_train, y_train) + y_pred_hard = voting_clf_hard.predict(X_train) + y_pred_hard_test = voting_clf_hard.predict(X_test) + X_trainN = np.concatenate((X_train,pd.DataFrame(y_pred_hard)), axis=1) + X_testN = np.concatenate((X_test,pd.DataFrame(y_pred_hard_test)), axis=1) + + stacking_clf1 = StackingClassifier(classifiers = model, + meta_classifier = LogisticRegression(random_state=9)) + stacking_clf1.fit(X_trainN, y_train) + y_pred = stacking_clf1.predict(X_testN) + accuracy = accuracy_score(y_test, y_pred) + return accuracy + + diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..26e4433 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..a1c6ae5 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ