diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c4ecd88 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..10f74d1 Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..d5fcfd7 Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/build.py b/q01_my_decision_regressor/build.py index 5eb1927..7bfeb74 100644 --- a/q01_my_decision_regressor/build.py +++ b/q01_my_decision_regressor/build.py @@ -1,17 +1,29 @@ +# %load q01_my_decision_regressor/build.py # default imports from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import pandas as pd - -data = pd.read_csv("./data/house_pricing.csv") +import numpy as np +from sklearn.metrics import mean_squared_error +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -param_grid = {"max_depth": [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], - "max_leaf_nodes": [2, 3, 4, 5, 10, 15, 20], - "max_features": [4, 8, 20, 25]} +param_grid = {'max_depth': [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], + 'max_leaf_nodes': [2, 3, 4, 5, 10, 15, 20], + 'max_features': [4, 8, 20, 25]} +def my_decision_regressor(X_train,X_test,y_train,y_test,param_grid): + dt = DecisionTreeRegressor(random_state=9) + gcv = GridSearchCV(dt,param_grid,cv=5) + model1 = gcv.fit(X_train,y_train) + y_pred = model1.predict(X_test) + estimator = gcv.get_params()['estimator'] + return model1.score(X_test,y_test),(gcv.best_params_) + +c=my_decision_regressor(X_train,X_test,y_train,y_test,param_grid) +c + -# Write your solution here : diff --git a/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..786773a Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc new file mode 100644 index 0000000..9f53b9d Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..b4d1a44 Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..c8dd1d9 Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/build.py b/q02_decision_regressor_plot/build.py index 020d81e..edf2c6a 100644 --- a/q02_decision_regressor_plot/build.py +++ b/q02_decision_regressor_plot/build.py @@ -1,3 +1,4 @@ +# %load q02_decision_regressor_plot/build.py # default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor @@ -7,11 +8,29 @@ import numpy as np plt.switch_backend('agg') -data = pd.read_csv("./data/house_pricing.csv") +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -depth_list = [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 80] +depths= [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 80] +def decision_regressor_plot(X_train,X_test,y_train,y_test,depths): + mse_train = [] + mse_test = [] + + for i in depths: + dtr = DecisionTreeRegressor(max_depth=i) + model = dtr.fit(X_train,y_train) + y_pred1 = model.predict(X_train) + e = mean_squared_error(y_train,y_pred1) + mse_train.append(e) + + y_pred2 = model.predict(X_test) + t = mean_squared_error(y_test,y_pred2) + mse_test.append(t) + + plt.plot(depths,mse_train) + plt.plot(depths,mse_test) +c=decision_regressor_plot(X_train,X_test,y_train,y_test,depths) + -# Write your solution here : diff --git a/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d6f5f97 Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc new file mode 100644 index 0000000..05381af Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc b/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a628aab Binary files /dev/null and b/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc b/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..8a26f1c Binary files /dev/null and b/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/build.py b/q03_my_decision_classifier/build.py index 73c9856..ff6852c 100644 --- a/q03_my_decision_classifier/build.py +++ b/q03_my_decision_classifier/build.py @@ -1,3 +1,4 @@ +# %load q03_my_decision_classifier/build.py # default imports from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier @@ -6,16 +7,26 @@ import pandas as pd import numpy as np -data = pd.read_csv("./data/loan_prediction.csv") +data = pd.read_csv('./data/loan_prediction.csv') np.random.seed(9) X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -param_grid = {"max_depth": [8, 10, 15, 20], - "max_leaf_nodes": [2, 5, 9, 15, 20], - "max_features": [1, 2, 3, 5]} +param_grid = {'max_depth': [8, 10, 15, 20], + 'max_leaf_nodes': [2, 5, 9, 15, 20], + 'max_features': [1, 2, 3, 5]} + + +def my_decision_classifier(X_train,X_test,y_train,y_test,param_grid,n_iter_search=10): + dtc = DecisionTreeClassifier(random_state=9) + rscv = RandomizedSearchCV(dtc,param_grid,n_iter=10) + model = rscv.fit(X_train,y_train) + y_pred = model.predict(X_test) + ac = accuracy_score(y_test,y_pred) + return ac,model.best_params_ +c=my_decision_classifier(X_train,X_test,y_train,y_test,param_grid,n_iter_search=10) +c -# Write your solution here : diff --git a/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc b/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3d36d5b Binary files /dev/null and b/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc b/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc new file mode 100644 index 0000000..4f5ce76 Binary files /dev/null and b/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..574dabc Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..de55529 Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/build.py b/q04_decision_classifier_plot/build.py index 44e9e87..479a14f 100644 --- a/q04_decision_classifier_plot/build.py +++ b/q04_decision_classifier_plot/build.py @@ -1,3 +1,4 @@ +# %load q04_decision_classifier_plot/build.py # default imports from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier @@ -7,14 +8,38 @@ import pandas as pd import numpy as np plt.switch_backend('agg') - -data = pd.read_csv("./data/loan_prediction.csv") +data = pd.read_csv('./data/loan_prediction.csv') np.random.seed(9) X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) +param_grid = {'max_depth': [8, 10, 15, 20], + 'max_leaf_nodes': [2, 5, 9, 15, 20], + 'max_features': [1, 2, 3, 5]} + + + depth_list = [8, 10, 15, 20, 50, 100, 120, 150, 175, 200] +res = [] +acc_list=[] +def decision_classifier_plot(X_train,X_test,y_train,y_test,depth_list): + for i in depth_list: + dtc = DecisionTreeClassifier(max_depth=i,random_state=9) + rscv = RandomizedSearchCV(dtc,param_grid,n_iter=10) + model = rscv.fit(X_train,y_train) + y_pred1 = model.predict(X_train) + acc = accuracy_score(y_train,y_pred1) + res.append(acc) + + + y_pred2 = model.predict(X_test) + ac = accuracy_score(y_test,y_pred2) + acc_list.append(ac) + plt.plot(depth_list,res) + plt.plot(depth_list,acc_list) + + +c = decision_classifier_plot(X_train,X_test,y_train,y_test,depth_list) -# Write your solution here : diff --git a/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..835c2c9 Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc new file mode 100644 index 0000000..dcc0be8 Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc differ