diff --git a/__init__.pyc b/__init__.pyc index d1c94d3..616b480 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_pipeline/__init__.pyc b/q01_pipeline/__init__.pyc index b360a57..55ce5a8 100644 Binary files a/q01_pipeline/__init__.pyc and b/q01_pipeline/__init__.pyc differ diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..1ef34c7 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -1,3 +1,4 @@ +# %load q01_pipeline/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV @@ -6,8 +7,28 @@ from sklearn.utils.class_weight import compute_class_weight from sklearn.metrics import roc_auc_score + + bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') -# Write your solution here : +#Label encode +le = LabelEncoder() +for col in bank.select_dtypes(include=["object"]).columns.values: + bank[col] = le.fit_transform(bank[col]) + +param_grid = {"n_estimators": [10, 50, 120], + "max_depth": [40, 20, 10], + "max_leaf_nodes": [5, 10, 2]} +X = bank.iloc[:,:-1] +y = bank['y'] +X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state=9) + +model = RandomForestClassifier(random_state=9,class_weight='balanced') + +def pipeline(X_train,X_test,y_train,y_test,model): + gscv = GridSearchCV(model, param_grid,cv=5) + gscv.fit(X_train,y_train) + y_pred = gscv.predict(X_test) + return gscv, roc_auc_score(y_test,y_pred).item() diff --git a/q01_pipeline/build.pyc b/q01_pipeline/build.pyc index 5a9b3ad..88e3b86 100644 Binary files a/q01_pipeline/build.pyc and b/q01_pipeline/build.pyc differ diff --git a/q01_pipeline/tests/__init__.pyc b/q01_pipeline/tests/__init__.pyc index b2f2c5b..0e2fbc0 100644 Binary files a/q01_pipeline/tests/__init__.pyc and b/q01_pipeline/tests/__init__.pyc differ diff --git a/q01_pipeline/tests/test_q01_pipeline.pyc b/q01_pipeline/tests/test_q01_pipeline.pyc index 109bce3..14b3614 100644 Binary files a/q01_pipeline/tests/test_q01_pipeline.pyc and b/q01_pipeline/tests/test_q01_pipeline.pyc differ