diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1f7bce8 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..b495160 Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..a23f595 Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index c34fb02..4276dce 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,3 +1,4 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -14,5 +15,28 @@ # Write your code here +def bagging(X_train, X_test, y_train, y_test,n_est = 10): + decision_clf = DecisionTreeClassifier() + + # Fitting single decision tree + decision_clf.fit(X_train, y_train) + y_pred_decision = decision_clf.predict(X_test) + score_dt1 = accuracy_score(y_test, y_pred_decision) + + + # Fitting bagging classifier with DecisionTreeClassifier + bagging_clf1 = BaggingClassifier(decision_clf, n_est, max_samples=0.67,max_features=0.67, + bootstrap=True, random_state=9) + bagging_clf1.fit(X_train, y_train) + y_pred_bagging = bagging_clf1.predict(X_test) + score_bc_dt = accuracy_score(y_test, y_pred_bagging) + + return plt.plot(n_est,score_bc_dt) + + + + +print(bagging(X_train, X_test, y_train, y_test,n_est = 10)) +plt.show() diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d168915 Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..2c9519b Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a47c2b0 Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..4e5de6a Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..5f2bbbd 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -15,4 +15,49 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) # Write your code here +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) + +model = [bagging_clf1, bagging_clf2, bagging_clf3] + +#Actual function call +dataframe = pd.read_csv('data/loan_prediction.csv') +X = dataframe.iloc[:, :-1] +y = dataframe.iloc[:, -1] + +X_train,X_test,y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=9) +print (X_train.shape) +print (X_test.shape) +print (y_train.shape) +print (y_test.shape) + +def stacking_clf(model, X_train,y_train, X_test, y_test): + + x_train_mdl = pd.DataFrame() + for mdl in model: + + mdl.fit(X_train, y_train) + x_train_mdl = pd.concat( [x_train_mdl, pd.DataFrame( mdl.predict_proba(X_train))] + ,axis=1) + + mdl_clf = LogisticRegression(random_state=9) + mdl_clf.fit(x_train_mdl,y_train) + + x_test_mdl= pd.DataFrame() + for mdl in model: + x_test_mdl = pd.concat( [x_test_mdl, pd.DataFrame( mdl.predict_proba(X_test))] + ,axis=1) + + y_pred = mdl_clf.predict(x_test_mdl) + + score = accuracy_score(y_test, y_pred) + return float(score) +print(stacking_clf(model,X_train,y_train,X_test,y_test)) diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..9121484 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..f48ff48 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ