commit-live-students · vivekshingate · Dec 11, 2018 · Dec 12, 2018 · Dec 13, 2018 · Dec 13, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc
diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py
@@ -1,16 +1,22 @@
+# %load q01_plot_corr/build.py
 # Default imports
 import pandas as pd
-from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
-plt.switch_backend('agg')
+from matplotlib.pyplot import yticks, xticks, subplots, set_cmap,show
+# plt.switch_backend('agg')
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
 
 # Write your solution here:
 def plot_corr(data, size=11):
     corr = data.corr()
     fig, ax = subplots(figsize=(size, size))
-    set_cmap("YlOrRd")
+    set_cmap('YlOrRd')
     ax.matshow(corr)
     xticks(range(len(corr.columns)), corr.columns, rotation=90)
     yticks(range(len(corr.columns)), corr.columns)
-    return ax
+    ax
+    show()
+
+plot_corr(data, size=11)
+
+
diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc
diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py
@@ -1,3 +1,4 @@
+# %load q02_best_k_features/build.py
 # Default imports
 
 import pandas as pd
@@ -9,4 +10,33 @@
 
 
 # Write your solution here:
+def percentile_k_features(df,k = 20):
+
+    X = df.iloc[:,:-1]
+    y = df.iloc[:,-1] 
+
+    selector = SelectPercentile(f_regression, k).fit(X, y)
+    #X_new = selector.transform(X)         # Not needed here as we need the features and their scores.
+
+    #We could have directly provided the list of columns selected by using the line below but the test case fails. 
+    #Column list must be in descending order of scores.
+    ColumnsSelected = X.columns[selector.get_support()]
+
+
+    #Hence, the below trouble.
+    feature_score = selector.scores_       # This gives scores of each feature
+    feature_selected = X.columns.values    # This gives the name of all original features/columns
+    bool_index = selector.get_support()    # Boolean values viz. True/False against each feature of column. True indicating the feature being selected. 
+
+    #Zipping all the 3 values into a list
+    zipped_list = list(zip(feature_score,feature_selected,bool_index))
+
+    #Putting the list in dataframe after sorting it in descending order based on scores.
+    Final_df = pd.DataFrame(zipped_list).sort_values(by=0,ascending=False) 
+
+    return list(Final_df.loc[Final_df.iloc[:,2]][1])
+
+
+percentile_k_features(df = data,k = 20)
+
 
diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc
diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py
@@ -1,11 +1,27 @@
+# %load q03_rf_rfe/build.py
 # Default imports
 import pandas as pd
-
+import numpy as np
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
 from sklearn.feature_selection import RFE
 from sklearn.ensemble import RandomForestClassifier
 
 
 # Your solution code here
+def rf_rfe(df):
+
+    X = data[[col for col in data.columns if col != 'SalePrice']]
+    y = data['SalePrice']
+
+    RFC_model = RandomForestClassifier(random_state=9)
+
+    RFE_model = RFE(estimator=RFC_model,n_features_to_select=None)
+    RFE_model.fit(X,y)
+
+#     print(X.columns)
+#     print(np.array(RFE_model.support_))
+#     print(np.array(RFE_model.ranking_))
+    return list(X.columns[RFE_model.support_])
+rf_rfe(data)
 
diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc
diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py
@@ -1,3 +1,4 @@
+# %load q04_select_from_model/build.py
 # Default imports
 from sklearn.feature_selection import SelectFromModel
 from sklearn.ensemble import RandomForestClassifier
@@ -6,5 +7,18 @@
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
-
 # Your solution code here
+def select_from_model(df):
+    X = data[[col for col in data.columns if col != 'SalePrice']]
+    y = data['SalePrice']
+
+    RFC_model = RandomForestClassifier(random_state=9)
+    RFC_model.fit(X,y)
+
+    SFM_model = SelectFromModel(RFC_model,prefit=True)
+    #X_new = SFM_model.transform(X)  #Not needed here.
+
+    return  list(X.columns[SFM_model.get_support()])
+
+select_from_model(data)
+
diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc