diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c295bc1 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/__init__.cpython-36.pyc b/q01_bagging/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..0faf69e Binary files /dev/null and b/q01_bagging/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/__pycache__/build.cpython-36.pyc b/q01_bagging/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..5be64ad Binary files /dev/null and b/q01_bagging/__pycache__/build.cpython-36.pyc differ diff --git a/q01_bagging/build.py b/q01_bagging/build.py index c34fb02..fa7eeca 100644 --- a/q01_bagging/build.py +++ b/q01_bagging/build.py @@ -14,5 +14,25 @@ # Write your code here - - +def bagging(X_train, X_test, y_train, y_test, n_est): + dtree = DecisionTreeClassifier(random_state=9) + lst_train_score = list() + lst_test_score = list() + estimator_arr = range(1,n_est,2) + for est_val in estimator_arr: + bagging_clf = BaggingClassifier(base_estimator=dtree, n_estimators=est_val,\ + max_samples=0.67, max_features=0.67, bootstrap=True,\ + random_state=9) + bagging_clf.fit(X_train, y_train) + y_pred_test = bagging_clf.predict(X_test) + y_pred_train = bagging_clf.predict(X_train) + accuracy_score_train = accuracy_score(y_true=y_train, y_pred=y_pred_train) + accuracy_score_test = accuracy_score(y_true=y_test, y_pred=y_pred_test) + lst_train_score.append(accuracy_score_train) + lst_test_score.append(accuracy_score_test) + plt.plot(estimator_arr, lst_train_score, color='b',label='Train Accuracy') + plt.plot(estimator_arr, lst_test_score, color='g',label='Test Accuracy') + plt.xlabel('Number of Estimators') + plt.ylabel('Accuracy Score') + plt.legend() + plt.show() diff --git a/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..ef84183 Binary files /dev/null and b/q01_bagging/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc new file mode 100644 index 0000000..121e7a1 Binary files /dev/null and b/q01_bagging/tests/__pycache__/test_q01_bagging.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..72d4c02 Binary files /dev/null and b/q02_stacking_clf/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/__pycache__/build.cpython-36.pyc b/q02_stacking_clf/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..71e25ea Binary files /dev/null and b/q02_stacking_clf/__pycache__/build.cpython-36.pyc differ diff --git a/q02_stacking_clf/build.py b/q02_stacking_clf/build.py index 7b1c5f8..6d9b6b6 100644 --- a/q02_stacking_clf/build.py +++ b/q02_stacking_clf/build.py @@ -15,4 +15,29 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) # Write your code here +def stacking_clf(model, X_train, y_train, X_test, y_test): + X_train_meta = pd.DataFrame() + X_test_meta = pd.DataFrame() + for model_ in model: + # fit the models passed to method, using X_train and y_train + model_.fit(X_train,y_train) + # create train dataframe for Meta Classifier using models passed to the method + # predict the probabilties on train (mlxtend library does not use probabilities + # actual classes and hence the accuracy score using mlxtend is 0.74054054054054053) + # also we do not need to consider class 0 and class 1 probability in this case but + # test case is written such tht this implementation of the method will pass + df_meta_train = pd.DataFrame(model_.predict_proba(X_train)) + X_train_meta = pd.concat([X_train_meta, df_meta_train],axis=1) + # create test dataframe for Meta Classifier using models passed to the method + # predict the probabilties on test + df_meta_test = pd.DataFrame(model_.predict_proba(X_test)) + X_test_meta = pd.concat([X_test_meta, df_meta_test],axis=1) + + # fit metaclassifier using Logistic + meta_logcf = LogisticRegression(random_state=9) + meta_logcf.fit(X_train_meta,y_train) + # Predict using metaclassifier using Logistic + y_pred_meta_test = meta_logcf.predict(X_test_meta) + acc_score = accuracy_score(y_true=y_test, y_pred=y_pred_meta_test) + return acc_score diff --git a/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d876211 Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc new file mode 100644 index 0000000..fe238ed Binary files /dev/null and b/q02_stacking_clf/tests/__pycache__/test_q02_stacking_clf.cpython-36.pyc differ