commit-live-students · sannidh · Nov 7, 2018 · Nov 7, 2018 · Nov 7, 2018 · Nov 7, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc
diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py
@@ -1,16 +1,20 @@
+# %load q01_plot_corr/build.py
 # Default imports
 import pandas as pd
 from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
+import matplotlib.pyplot as plt
 plt.switch_backend('agg')
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
-
 # Write your solution here:
 def plot_corr(data, size=11):
     corr = data.corr()
     fig, ax = subplots(figsize=(size, size))
-    set_cmap("YlOrRd")
+    set_cmap('YlOrRd')
     ax.matshow(corr)
     xticks(range(len(corr.columns)), corr.columns, rotation=90)
     yticks(range(len(corr.columns)), corr.columns)
     return ax
+
+
+
diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc
diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py
@@ -1,12 +1,20 @@
+# %load q02_best_k_features/build.py
 # Default imports
 
 import pandas as pd
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
-from sklearn.feature_selection import SelectPercentile
-from sklearn.feature_selection import f_regression
+from sklearn.feature_selection import SelectPercentile, f_regression, SelectFromModel
+
+def percentile_k_features(df, k=20):
+    selector = SelectPercentile(f_regression, percentile=k)
+    X,y = df.iloc[:,:-1], df.iloc[:,-1]
+    selector.fit(X,y)
+    idx_selected = selector.get_support(indices=True)
+    idx_sorted = [idx_selected for _, idx_selected in sorted(zip(selector.scores_[idx_selected], idx_selected), reverse=True)]
+    features_train = df.iloc[:,idx_sorted]
+    return list(features_train.columns.values)
 
 
-# Write your solution here:
 
diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc
diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py
@@ -1,3 +1,4 @@
+# %load q03_rf_rfe/build.py
 # Default imports
 import pandas as pd
 
@@ -6,6 +7,17 @@
 from sklearn.feature_selection import RFE
 from sklearn.ensemble import RandomForestClassifier
 
+def rf_rfe(df):
+    model = RandomForestClassifier()
+    X,y = df.iloc[:,:-1], df.iloc[:,-1]
+    selector = RFE(model, int(df.shape[1]/2))
+    selector.fit(X,y)
+    idx_selected = selector.get_support(indices=True)
+    idx_sorted = [idx_selected for _, idx_selected in sorted(zip(selector.ranking_[idx_selected],idx_selected))]
+    features_train = df.iloc[:,idx_sorted]
+    return list(features_train.columns.values)
+
+
+
 
-# Your solution code here
 
diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc
diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py
@@ -1,3 +1,4 @@
+# %load q04_select_from_model/build.py
 # Default imports
 from sklearn.feature_selection import SelectFromModel
 from sklearn.ensemble import RandomForestClassifier
@@ -6,5 +7,17 @@
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
+def select_from_model(df):
+    X,y = df.iloc[:,:-1], df.iloc[:,-1]
+    clf = RandomForestClassifier(random_state=9)
+    clf.fit(X, y)
+    selector = SelectFromModel(clf, prefit=True)
+    selector.transform(X)
+    idx_selected = selector.get_support(indices=True)
+    features_train = df.iloc[:,idx_selected]
+    return list(features_train.columns.values)
+
+
+
+
 
-# Your solution code here
diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/build.cpython-36.pyc b/q05_forward_selected/__pycache__/build.cpython-36.pyc
diff --git a/q05_forward_selected/build.py b/q05_forward_selected/build.py
@@ -1,10 +1,59 @@
+# %load q05_forward_selected/build.py
 # Default imports
+from greyatomlib.feature_selection.q05_forward_selected.build import forward_selected
 import pandas as pd
+import numpy as np
+import math
 from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
-model = LinearRegression()
+def foward_selected(df, model=LinearRegression()):
+    X = df.drop(df.columns[len(df.columns)-1], axis=1)
+    y = df.iloc[:,-1]
+    col_list = X.columns
+    def select_cols(df, col_list):
+        return df[[item for item in col_list]]
+    Variable_f, Variable_f_max = [], []
+    Score_r2, Score_r2_max  = [], []
+    # the first feature
+    for index in range(len(col_list)):
+        feature_list=[col_list[index]]
+        new_X = select_cols(X,feature_list)
+        model.fit(new_X,y)
+        r2 = model.score(new_X,y)
+        Variable_f.append(new_X.columns[0])
+        Score_r2.append(r2)
+    max_r2_score = max(Score_r2)
+    Variable_f_max = [Variable_f[Score_r2.index(max_r2_score)]]
+    Score_r2_max.append(max_r2_score)
+    #The next features
+    new_Variable_f_max, new_Variable_f, new_Score_r2, new_Score_r2_max, new_Variable_f_max_capture, new_Score_r2_max_capture = [], [], [], [], [], []
+    new_Score_r2_max_capture.append(max_r2_score)
+    no_features_f_max = len(new_Variable_f)
+    new_Variable_f = Variable_f_max
+    def features(j):
+        feature_list=col_list[j]
+        return feature_list
+    for index in range(len(col_list)-1):
+        for k in range(len(col_list)-1):
+            if features(k) not in new_Variable_f:
+                new_Variable_f = Variable_f_max.copy()
+                new_Variable_f.append(features(k))
+                new_X1 = select_cols(X,new_Variable_f)
+                model.fit(new_X1,y)
+                new_r2 = model.score(new_X1,y)
+                new_Score_r2.append(new_r2)
+                new_max_r2_score = max(new_Score_r2)
+                new_Variable_f_max.append(new_X1.columns)
+                largest_indice = new_Score_r2.index(new_max_r2_score)
+                new_Variable_f_max_capture =list(new_Variable_f_max[largest_indice])        
+        new_Score_r2_max_capture.append(new_max_r2_score)
+        Variable_f_max = new_Variable_f_max_capture.copy()
+    return Variable_f_max, new_Score_r2_max_capture
+foward_selected(data, LinearRegression())
+
+
 
 
-# Your solution code here
diff --git a/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc