Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
20 changes: 16 additions & 4 deletions q01_my_decision_regressor/build.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
# %load q01_my_decision_regressor/build.py
# default imports
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

data = pd.read_csv("./data/house_pricing.csv")
data = pd.read_csv('./data/house_pricing.csv')
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

param_grid = {"max_depth": [2, 3, 5, 6, 8, 10, 15, 20, 30, 50],
"max_leaf_nodes": [2, 3, 4, 5, 10, 15, 20],
"max_features": [4, 8, 20, 25]}
param_grid = {'max_depth': [2, 3, 5, 6, 8, 10, 15, 20, 30, 50],
'max_leaf_nodes': [2, 3, 4, 5, 10, 15, 20],
'max_features': [4, 8, 20, 25]}

# Write your solution here :
def my_decision_regressor(X_train,X_test,y_train,y_test,param_grid):
dt_regressor = DecisionTreeRegressor(random_state=9)
grid_search = GridSearchCV(dt_regressor,param_grid=param_grid,cv=5)

grid_search.fit(X_train,y_train)
predictions = grid_search.predict(X_test)

return r2_score(y_test,predictions),grid_search.best_params_



Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
28 changes: 26 additions & 2 deletions q02_decision_regressor_plot/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q02_decision_regressor_plot/build.py
# default imports
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
Expand All @@ -6,12 +7,35 @@
import matplotlib.pyplot as plt
import numpy as np
plt.switch_backend('agg')

data = pd.read_csv("./data/house_pricing.csv")
data = pd.read_csv('./data/house_pricing.csv')
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

depth_list = [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 80]

# Write your solution here :
def decision_regressor_plot(X_train,X_test,y_train,y_test,depth_list):

errors_test = []
errors_train = []
for i in range(len(depth_list)):
dt_Reg = DecisionTreeRegressor(max_depth=depth_list[i],random_state=9)
dt_Reg.fit(X_train,y_train)

preds_train = dt_Reg.predict(X_train)
preds_test = dt_Reg.predict(X_test)

errors_train.append(mean_squared_error(y_train,preds_train))
errors_test.append(mean_squared_error(y_test,preds_test))

plt.plot(depth_list,errors_train,label='Train Error')
plt.plot(depth_list,errors_test,label='Test Error')
plt.legend()
plt.ylabel('Mean Squared Error')
plt.xlabel('Max Depth')
plt.show()
decision_regressor_plot(X_train,X_test,y_train,y_test,depth_list)



Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
18 changes: 14 additions & 4 deletions q03_my_decision_classifier/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_my_decision_classifier/build.py
# default imports
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
Expand All @@ -6,16 +7,25 @@
import pandas as pd
import numpy as np

data = pd.read_csv("./data/loan_prediction.csv")
data = pd.read_csv('./data/loan_prediction.csv')
np.random.seed(9)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

param_grid = {"max_depth": [8, 10, 15, 20],
"max_leaf_nodes": [2, 5, 9, 15, 20],
"max_features": [1, 2, 3, 5]}
param_grid = {'max_depth': [8, 10, 15, 20],
'max_leaf_nodes': [2, 5, 9, 15, 20],
'max_features': [1, 2, 3, 5]}


# Write your solution here :
def my_decision_classifier(X_train,X_test,y_train,y_test,param_grid,n_iter_search=10):
dt_reg = DecisionTreeClassifier(random_state=9)
random_cv = RandomizedSearchCV(dt_reg,param_distributions=param_grid,n_iter=n_iter_search)

random_cv.fit(X_train,y_train)
return accuracy_score(y_test,random_cv.predict(X_test)),random_cv.best_params_




Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
22 changes: 21 additions & 1 deletion q04_decision_classifier_plot/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q04_decision_classifier_plot/build.py
# default imports
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
Expand All @@ -8,7 +9,7 @@
import numpy as np
plt.switch_backend('agg')

data = pd.read_csv("./data/loan_prediction.csv")
data = pd.read_csv('./data/loan_prediction.csv')
np.random.seed(9)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
Expand All @@ -18,3 +19,22 @@


# Write your solution here :
def decision_classifier_plot(X_train,X_test,y_train,y_test,depth_list):
error_train = []
error_test = []
for i in range(len(depth_list)):
dt_classifier = DecisionTreeClassifier(max_depth=depth_list[i],random_state=9)
dt_classifier.fit(X_train,y_train)
predict_train = dt_classifier.predict(X_train)
predict_test = dt_classifier.predict(X_test)

error_train.append(accuracy_score(y_train,predict_train))

error_test.append(accuracy_score(y_test,predict_test))


plt.plot(depth_list,error_test)
plt.plot(depth_list,error_train)
plt.show()


Binary file not shown.
Binary file not shown.