diff --git a/__init__.pyc b/__init__.pyc index d1c94d3..e6467dd 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_pipeline/__init__.pyc b/q01_pipeline/__init__.pyc index b360a57..6895574 100644 Binary files a/q01_pipeline/__init__.pyc and b/q01_pipeline/__init__.pyc differ diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..15ff46e 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -1,3 +1,4 @@ +# %load q01_pipeline/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV @@ -7,7 +8,29 @@ from sklearn.metrics import roc_auc_score bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') - +model=RandomForestClassifier(random_state=9,class_weight='balanced') # Write your solution here : +y=bank['y'] +X=bank.drop(['y'],axis=1) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +def pipeline(X_train, X_test, y_train, y_test,model): + param_grid={'max_depth':[2,3,5,6,8,10,15,20,30],'max_leaf_nodes':[2,5,10,15,20], + 'max_features':[8,10,12,14]} + grid=GridSearchCV(estimator=model,param_grid=param_grid) + label=LabelEncoder() + y_train=label.fit_transform(y_train) + for column in X_train.columns: + if X_train[column].dtype==type(object): + label=LabelEncoder() + X_train[column]=label.fit_transform(X_train[column]) + y_test=label.fit_transform(y_test) + for column in X_test.columns: + if X_test[column].dtype==type(object): + label=LabelEncoder() + X_test[column]=label.fit_transform(X_test[column]) + grid.fit(X_train,y_train) + auc=roc_auc_score(y_test,grid.predict(X_test)) + return grid.fit(X_train,y_train),auc +#pipeline(X_train, X_test, y_train, y_test,model) diff --git a/q01_pipeline/build.pyc b/q01_pipeline/build.pyc index 5a9b3ad..55c611a 100644 Binary files a/q01_pipeline/build.pyc and b/q01_pipeline/build.pyc differ diff --git a/q01_pipeline/tests/__init__.pyc b/q01_pipeline/tests/__init__.pyc index b2f2c5b..4c48128 100644 Binary files a/q01_pipeline/tests/__init__.pyc and b/q01_pipeline/tests/__init__.pyc differ diff --git a/q01_pipeline/tests/test_q01_pipeline.pyc b/q01_pipeline/tests/test_q01_pipeline.pyc index 109bce3..68708a8 100644 Binary files a/q01_pipeline/tests/test_q01_pipeline.pyc and b/q01_pipeline/tests/test_q01_pipeline.pyc differ