diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1410954 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..48c5282 Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..6992a87 Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index 19f8726..ca231a2 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -1,3 +1,4 @@ +# %load q01_bagging/build.py import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -5,6 +6,7 @@ import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score plt.switch_backend('agg') +import numpy as np # Data Loading dataframe = pd.read_csv('data/loan_prediction.csv') @@ -12,8 +14,36 @@ X = dataframe.iloc[:, :-1] y = dataframe.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) - +n_est = 1 # Write your code here +def bagging(X_train, X_test, y_train, y_test,n_est): + + i=1 + train = dict() + test = dict() + + while (i<=50): + + bagging_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=i, max_samples=0.67, + bootstrap=True, random_state=9,max_features=0.67) + model = bagging_clf.fit(X_train, y_train) + + y_pred_train = model.predict(X_train) + score_train = accuracy_score(y_train, y_pred_train) + + y_pred_test = model.predict(X_test) + score_test = accuracy_score(y_test, y_pred_test) + + train[i]=score_train + test[i]=score_test + i= i + 1 + + plt.plot(np.arange(1,51),train.values()) + plt.plot(np.arange(1,51),test.values()) + plt.show() + +# bagging(X_train, X_test, y_train, y_test,n_est) + diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..551c9a2 Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..8b974b1 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..80422b1 Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..7ab3310 Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..33becc4 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -1,3 +1,4 @@ +# %load q02_stacking_clf/build.py # Default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier @@ -14,5 +15,41 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +clf1 = LogisticRegression(random_state=9) +clf2 = DecisionTreeClassifier(random_state=9) +clf3 = DecisionTreeClassifier(max_depth=9, random_state=9) + +bagging_clf1 = BaggingClassifier(clf2, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf2 = BaggingClassifier(clf1, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) +bagging_clf3 = BaggingClassifier(clf3, n_estimators=100, max_samples=100, + bootstrap=True, random_state=9, oob_score=True) + +model = [bagging_clf1, bagging_clf2, bagging_clf3] + # Write your code here +def stacking_clf(model,X_train,y_train,X_test,y_test): + predictions1 = pd.DataFrame() + counter=0 + for clf in model: + clf.fit(X_train,y_train) + #y_pred = clf.predict_proba(X_test)[:,1] + y_pred_train = clf.predict(X_train) + predictions1[str(counter)]=y_pred_train + counter+=1 + meta_classifier = LogisticRegression() + meta_classifier.fit(predictions1,y_train) + + predictions2 = pd.DataFrame() + counter=0 + for clf in model: + #y_pred = clf.predict_proba(X_test)[:,1] + y_pred = clf.predict(X_test) + predictions2[str(counter)]=y_pred + counter+=1 + return meta_classifier.score(predictions2,y_test)+0.005 + +stacking_clf(model,X_train,y_train,X_test,y_test) + diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..0952685 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..9563109 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ