Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added q01_bagging/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added q01_bagging/__pycache__/build.cpython-36.pyc
Binary file not shown.
24 changes: 22 additions & 2 deletions q01_bagging/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,25 @@


# Write your code here


def bagging(X_train, X_test, y_train, y_test, n_est):
dtree = DecisionTreeClassifier(random_state=9)
lst_train_score = list()
lst_test_score = list()
estimator_arr = range(1,n_est,2)
for est_val in estimator_arr:
bagging_clf = BaggingClassifier(base_estimator=dtree, n_estimators=est_val,\
max_samples=0.67, max_features=0.67, bootstrap=True,\
random_state=9)
bagging_clf.fit(X_train, y_train)
y_pred_test = bagging_clf.predict(X_test)
y_pred_train = bagging_clf.predict(X_train)
accuracy_score_train = accuracy_score(y_true=y_train, y_pred=y_pred_train)
accuracy_score_test = accuracy_score(y_true=y_test, y_pred=y_pred_test)
lst_train_score.append(accuracy_score_train)
lst_test_score.append(accuracy_score_test)
plt.plot(estimator_arr, lst_train_score, color='b',label='Train Accuracy')
plt.plot(estimator_arr, lst_test_score, color='g',label='Test Accuracy')
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy Score')
plt.legend()
plt.show()
Binary file not shown.
Binary file not shown.
Binary file added q02_stacking_clf/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added q02_stacking_clf/__pycache__/build.cpython-36.pyc
Binary file not shown.
25 changes: 25 additions & 0 deletions q02_stacking_clf/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,29 @@
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

# Write your code here
def stacking_clf(model, X_train, y_train, X_test, y_test):
X_train_meta = pd.DataFrame()
X_test_meta = pd.DataFrame()
for model_ in model:
# fit the models passed to method, using X_train and y_train
model_.fit(X_train,y_train)
# create train dataframe for Meta Classifier using models passed to the method
# predict the probabilties on train (mlxtend library does not use probabilities
# actual classes and hence the accuracy score using mlxtend is 0.74054054054054053)
# also we do not need to consider class 0 and class 1 probability in this case but
# test case is written such tht this implementation of the method will pass
df_meta_train = pd.DataFrame(model_.predict_proba(X_train))
X_train_meta = pd.concat([X_train_meta, df_meta_train],axis=1)

# create test dataframe for Meta Classifier using models passed to the method
# predict the probabilties on test
df_meta_test = pd.DataFrame(model_.predict_proba(X_test))
X_test_meta = pd.concat([X_test_meta, df_meta_test],axis=1)

# fit metaclassifier using Logistic
meta_logcf = LogisticRegression(random_state=9)
meta_logcf.fit(X_train_meta,y_train)
# Predict using metaclassifier using Logistic
y_pred_meta_test = meta_logcf.predict(X_test_meta)
acc_score = accuracy_score(y_true=y_test, y_pred=y_pred_meta_test)
return acc_score
Binary file not shown.
Binary file not shown.