commit-live-students · Akashdesarda · Dec 6, 2018 · Dec 6, 2018 · Dec 6, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_missing_value/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_missing_value/__pycache__/build.cpython-36.pyc b/q01_missing_value/__pycache__/build.cpython-36.pyc
diff --git a/q01_missing_value/build.py b/q01_missing_value/build.py
@@ -1,10 +1,23 @@
+# %load q01_missing_value/build.py
 # Default imports
 import pandas as pd
-
+from sklearn.preprocessing import Imputer
 # Data loading
 ny_housing = pd.read_csv('data/train.csv')
 # Selecting 4 most relevant variables along with target variable from the dataset fot the Cleaning and Preprocessing.
 housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']]
 
 
 # Write your code here:
+def imputation(housing_data):
+    df = housing_data
+    imp = Imputer(missing_values=float('NaN'),strategy='mean', axis=0)
+    df['MasVnrArea'] = imp.fit_transform(df[['MasVnrArea']])
+    df['GrLivArea'] = imp.fit_transform(df[['GrLivArea']])
+    df['SalePrice'] = imp.fit_transform(df[['SalePrice']])
+    df['LotShape'] = df['LotShape'].fillna(df['LotShape'].mode()[0])
+    df['GarageType'] = df['GarageType'].fillna(df['GarageType'].mode()[0])
+    return df[['MasVnrArea', 'GrLivArea', 'SalePrice']],df[['LotShape', 'GarageType']]
+#imputation(housing_data)
+
+
diff --git a/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc
diff --git a/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_outlier_removal/__pycache__/build.cpython-36.pyc b/q02_outlier_removal/__pycache__/build.cpython-36.pyc
diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py
@@ -1,3 +1,4 @@
+# %load q02_outlier_removal/build.py
 # Default imports
 import pandas as pd
 
@@ -8,3 +9,13 @@
 
 
 # Write your code here:
+def outlier_removal(df):
+
+    num_columns =df.select_dtypes(include=['float64','int64'])
+    quantile_95= num_columns.quantile(0.95)
+    for colname in num_columns:
+        quantile = quantile_95[colname]
+        df=df.drop(df[df[colname]>quantile].index)
+    return df
+
+
diff --git a/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc
diff --git a/q03_skewness_log/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_skewness_log/__pycache__/build.cpython-36.pyc b/q03_skewness_log/__pycache__/build.cpython-36.pyc
diff --git a/q03_skewness_log/build.py b/q03_skewness_log/build.py
@@ -1,3 +1,4 @@
+# %load q03_skewness_log/build.py
 from scipy.stats import skew
 import pandas as pd
 import numpy as np
@@ -6,3 +7,13 @@
 
 
 # Write code here:
+def skewness_log(data):
+    data['GrLivAreaLT'] = np.log(data['GrLivArea'])
+    skewed_grLiv = skew(data['GrLivAreaLT'])
+
+    data['SalePriceLT'] = np.log(data['SalePrice'])
+    skewed_SaleP = skew(data['SalePriceLT'])
+
+    return skewed_grLiv, skewed_SaleP
+
+
diff --git a/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc