diff --git a/__init__.pyc b/__init__.pyc index d1c94d3..67d5d98 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_pipeline/__init__.pyc b/q01_pipeline/__init__.pyc index b360a57..f67279c 100644 Binary files a/q01_pipeline/__init__.pyc and b/q01_pipeline/__init__.pyc differ diff --git a/q01_pipeline/build.py b/q01_pipeline/build.py index 96beca7..26f41a2 100644 --- a/q01_pipeline/build.py +++ b/q01_pipeline/build.py @@ -9,5 +9,21 @@ bank = pd.read_csv('data/Bank_data_to_class.csv', sep=',') # Write your solution here : +le = LabelEncoder() + +model = RandomForestClassifier(random_state=9,class_weight = 'balanced',n_estimators=50) + +for column in bank.select_dtypes(include=['object']).columns.values: + bank[column] = le.fit_transform(bank[column]) +X_train,X_test,y_train,y_test = train_test_split(bank.iloc[:,:-1],bank['y'],test_size =0.3,random_state=9) + + +def pipeline(X_train,X_test,y_train,y_test,model): + + param_grid = {"max_depth":[2,3,4,5,6,10,15,20,30],"max_features":[8,10,12,14],"max_leaf_nodes":[2,5,10,15,20]} + gs = GridSearchCV(estimator=model,param_grid=param_grid,verbose=1) + gs.fit(X_train,y_train) + auc = roc_auc_score(y_test,gs.predict(X_test)) + return gs.fit(X_train,y_train),auc diff --git a/q01_pipeline/build.pyc b/q01_pipeline/build.pyc index 5a9b3ad..fac097d 100644 Binary files a/q01_pipeline/build.pyc and b/q01_pipeline/build.pyc differ diff --git a/q01_pipeline/tests/__init__.pyc b/q01_pipeline/tests/__init__.pyc index b2f2c5b..ed4da63 100644 Binary files a/q01_pipeline/tests/__init__.pyc and b/q01_pipeline/tests/__init__.pyc differ diff --git a/q01_pipeline/tests/test_q01_pipeline.pyc b/q01_pipeline/tests/test_q01_pipeline.pyc index 109bce3..f8066b5 100644 Binary files a/q01_pipeline/tests/test_q01_pipeline.pyc and b/q01_pipeline/tests/test_q01_pipeline.pyc differ